def __init__(self, name, config): """ Initializes the classifier. :param config: Dictionary of parameters (read from configuration ``.yaml`` file). :type config: ``ptp.configuration.ConfigInterface`` """ # Call constructors of parent classes. Model.__init__(self, name, FeedForwardNetwork, config) # Get key mappings. self.key_inputs = self.stream_keys["inputs"] self.key_predictions = self.stream_keys["predictions"] # Retrieve input size from global variables. self.input_size = self.globals["input_size"] if type(self.input_size) == list: if len(self.input_size) == 1: self.input_size = self.input_size[0] else: raise ConfigurationError("SoftmaxClassifier input size '{}' must be a single dimension (current {})".format(self.global_keys["input_size"], self.input_size)) # Retrieve output (prediction) size from global params. self.prediction_size = self.globals["prediction_size"] if type(self.prediction_size) == list: if len(self.prediction_size) == 1: self.prediction_size = self.prediction_size[0] else: raise ConfigurationError("SoftmaxClassifier prediction size '{}' must be a single dimension (current {})".format(self.global_keys["prediction_size"], self.prediction_size)) self.logger.info("Initializing softmax classifier with input size = {} and prediction size = {}".format(self.input_size, self.prediction_size)) # Create the model. self.layers = torch.nn.ModuleList() # Retrieve number of hidden layers, along with their sizes (numbers of hidden neurons from configuration). try: hidden_sizes = self.config["hidden_sizes"] if type(hidden_sizes) == list: # Stack linear layers. input_dim = self.input_size for hidden_dim in hidden_sizes: # Add linear layer. self.layers.append( torch.nn.Linear(input_dim, hidden_dim) ) input_dim = hidden_dim # Create activation layer. self.activation = torch.nn.ReLU() # Retrieve dropout rate value - if set, will put dropout between every layer. dropout_rate = self.config["dropout_rate"] # Create dropout layer. self.dropout = torch.nn.Dropout(dropout_rate) # Add output layer. self.layers.append( torch.nn.Linear(input_dim, self.prediction_size) ) self.logger.info("Created {} hidden layers".format(len(self.layers)-1)) else: raise ConfigurationError("SoftmaxClassifier 'hidden_sizes' must contain a list with numbers of neurons in hidden layers (currently {})".format(self.hidden_sizes)) except KeyError: # Not present, in that case create a simple classifier with 1 linear layer. self.layers.append( torch.nn.Linear(self.input_size, self.prediction_size) ) # Create the final non-linearity. self.use_logsoftmax = self.config["use_logsoftmax"] if self.use_logsoftmax: self.log_softmax = torch.nn.LogSoftmax(dim=1)
def build(self, use_logger=True): """ Method creating the pipeline, consisting of: - a list components ordered by the priority (dictionary). - task (as a separate "link" to object in the list of components, instance of a class derrived from Task class) - models (separate list with link to objects in components dict) - losses (selarate list with links to objects in components dict) :param use_logger: Logs the detected errors (DEFAULT: True) :return: number of detected errors. """ errors = 0 self.__priorities = [] # Special section names to "skip". sections_to_skip = "name load freeze disable".split() disabled_components = '' # Add components to disable by the ones from configuration file. if "disable" in self.config: disabled_components = [ *disabled_components, *self.config["disable"].replace(" ", "").split(",") ] # Add components to disable by the ones from command line arguments. if (self.app_state.args is not None) and (self.app_state.args.disable != ''): disabled_components = [ *disabled_components, *self.app_state.args.disable.split(",") ] # Organize all components according to their priorities. for c_key, c_config in self.config.items(): try: # Skip "special" pipeline sections. if c_key in sections_to_skip: #self.logger.info("Skipping section '{}'".format(c_key)) continue # Skip "disabled" components. if c_key in disabled_components: self.logger.info("Disabling component '{}'".format(c_key)) continue # Check presence of priority. if 'priority' not in c_config: raise KeyError( "Section '{}' does not contain the key 'priority' defining the pipeline order" .format(c_key)) # Get the priority. try: c_priority = float(c_config["priority"]) except ValueError: raise ConfigurationError( "Priority [{}] in section '{}' is not a floating point number" .format(c_config["priority"], c_key)) # Check uniqueness of the priority. if c_priority in self.__components.keys(): raise ConfigurationError( "Found more than one component with the same priority [{}]" .format(c_priority)) # Ok, got the component name with priority. Save it. # Later we will "plug" the adequate component in this place. self.__components[c_priority] = c_key except ConfigurationError as e: if use_logger: self.logger.error(e) errors += 1 continue except KeyError as e: if use_logger: self.logger.error(e) errors += 1 continue # end try/else # end for if use_logger: self.logger.info("Building pipeline with {} components".format( len(self.__components))) # Do not continue if found errors. if errors > 0: return errors # Sort priorities. self.__priorities = sorted(self.__components.keys()) for c_priority in self.__priorities: try: # The section "key" will be used as "component" name. c_key = self.__components[c_priority] # Get section. c_config = self.config[c_key] if use_logger: self.logger.info( "Creating component '{}' ({}) with priority [{}]". format(c_key, c_config["type"], c_priority)) # Create component. component, class_obj = ComponentFactory.build(c_key, c_config) # Check if class is derived (even indirectly) from Task. if ComponentFactory.check_inheritance(class_obj, ptp.Task.__name__): raise ConfigurationError( "Object '{}' cannot be instantiated as part of pipeline, \ as its class type '{}' is derived from Task class!". format(c_key, class_obj.__name__)) # Add it to dict. self.__components[c_priority] = component # Check if class is derived (even indirectly) from Model. if ComponentFactory.check_inheritance(class_obj, ptp.Model.__name__): # Add to list. self.models.append(component) # Check if class is derived (even indirectly) from Loss. if ComponentFactory.check_inheritance(class_obj, ptp.Loss.__name__): # Add to list. self.losses.append(component) except ConfigurationError as e: if use_logger: self.logger.error( "Detected configuration error while creating the component '{}' instance:\n {}" .format(c_key, e)) errors += 1 continue except KeyError as e: if use_logger: self.logger.error( "Detected key error while creating the component '{}' instance: required key '{}' is missing" .format(c_key, e)) errors += 1 continue # end try/else # end for # Return detected errors. return errors
def __init__(self, name, config): """ Initializes task object. Calls base constructor. Downloads the dataset if not present and loads the adequate files depending on the mode. :param name: Name of the component. :param class_type: Class type of the component. :param config: Dictionary of parameters (read from configuration ``.yaml`` file). """ # Call constructors of parent classes. Task.__init__(self, name, GQA, config) # Get key mappings of all output streams. self.key_sample_ids = self.stream_keys["sample_ids"] self.key_images = self.stream_keys["images"] self.key_image_ids = self.stream_keys["image_ids"] self.key_questions = self.stream_keys["questions"] self.key_answers = self.stream_keys["answers"] self.key_full_answers = self.stream_keys["full_answers"] # Get flag informing whether we want to stream images or not. self.stream_images = self.config['stream_images'] # Check the resize image option. if len(self.config['resize_image']) != 2: self.logger.error( "'resize_image' field must contain 2 values: the desired height and width" ) exit(-1) # Output image dimensions. self.height = self.config['resize_image'][0] self.width = self.config['resize_image'][1] self.depth = 3 self.logger.info( "Setting image size to [D x H x W]: {} x {} x {}".format( self.depth, self.height, self.width)) # Set global variables - all dimensions ASIDE OF BATCH. self.globals["image_height"] = self.height self.globals["image_width"] = self.width self.globals["image_depth"] = self.depth # Get image preprocessing. self.image_preprocessing = get_value_list_from_dictionary( "image_preprocessing", self.config, 'none | normalize | all'.split(" | ")) if 'none' in self.image_preprocessing: self.image_preprocessing = [] if 'all' in self.image_preprocessing: self.image_preprocessing = ['normalize'] # Add resize as transformation. self.image_preprocessing = ["resize"] + self.image_preprocessing self.logger.info("Applied image preprocessing: {}".format( self.image_preprocessing)) # Get the absolute path. self.data_folder = os.path.expanduser(self.config['data_folder']) # Get split. split = get_value_from_dictionary( 'split', self.config, "training_0 | training | validation | test_dev | test".split( " | ")) self.split_image_folder = os.path.join(self.data_folder, "images") # Set split-dependent data. if split == 'training': # Training split folder and file with data question. data_files = [] for i in range(10): data_files.append( os.path.join(self.data_folder, "questions1.2", "train_all_questions", "train_all_questions_{}.json".format(i))) elif split == 'training_0': # Validation split folder and file with data question. data_files = [ os.path.join(self.data_folder, "questions1.2", "train_all_questions", "train_all_questions_0.json") ] self.logger.warning( "Please remember that this split constitutes only 10 percent of the whole training set!" ) elif split == 'validation': # Validation split folder and file with data question. data_files = [ os.path.join(self.data_folder, "questions1.2", "val_all_questions.json") ] self.logger.warning("Please use 'test_dev' split for validation!") elif split == 'test_dev': # Validation split folder and file with data question. data_files = [ os.path.join(self.data_folder, "questions1.2", "testdev_all_questions.json") ] elif split == 'test': # Test split folder and file with data question. data_files = [ os.path.join(self.data_folder, "questions1.2", "test_all_questions.json") ] else: raise ConfigurationError( "Split {} not supported yet".format(split)) # Load dataset. self.dataset = self.load_dataset(data_files) # Display exemplary sample. i = 0 sample = self.dataset[i] # Check if this is a test set. self.logger.info( "Exemplary sample {} ({}):\n image_ids: {}\n question: {}\n answer: {} ({})" .format(i, sample[self.key_sample_ids], sample[self.key_image_ids], sample[self.key_questions], sample[self.key_answers], sample[self.key_full_answers]))
def build(self, log=True): """ Method creates a problem on the basis of configuration section. :param log: Logs information and the detected errors (DEFAULT: TRUE) :return: number of detected errors """ try: # Create component. component, class_obj = ComponentFactory.build( "problem", self.config["problem"]) # Check if class is derived (even indirectly) from Problem. if not ComponentFactory.check_inheritance(class_obj, ptp.Problem.__name__): raise ConfigurationError( "Class '{}' is not derived from the Problem class!".format( class_obj.__name__)) # Set problem. self.problem = component # Try to build the sampler. self.sampler = SamplerFactory.build(self.problem, self.config["sampler"]) if self.sampler is not None: # Set shuffle to False - REQUIRED as those two are exclusive. self.config['dataloader'].add_config_params({'shuffle': False}) # build the DataLoader on top of the validation problem self.dataloader = DataLoader( dataset=self.problem, batch_size=self.config['problem']['batch_size'], shuffle=self.config['dataloader']['shuffle'], sampler=self.sampler, batch_sampler=self.config['dataloader']['batch_sampler'], num_workers=self.config['dataloader']['num_workers'], collate_fn=self.problem.collate_fn, pin_memory=self.config['dataloader']['pin_memory'], drop_last=self.config['dataloader']['drop_last'], timeout=self.config['dataloader']['timeout'], worker_init_fn=self.worker_init_fn) # Display sizes. if log: self.logger.info("Problem for '{}' loaded (size: {})".format( self.name, len(self.problem))) if (self.sampler is not None): self.logger.info( "Sampler for '{}' created (size: {})".format( self.name, len(self.sampler))) # Ok, success. return 0 except ConfigurationError as e: if log: self.logger.error( "Detected configuration error while creating the problem instance:\n {}" .format(e)) # Return error. return 1 except KeyError as e: if log: self.logger.error( "Detected key error while creating the problem instance: required key {} is missing" .format(e)) # Return error. return 1
def __init__(self, name, config): """ Initializes task object. Calls base constructor. Downloads the dataset if not present and loads the adequate files depending on the mode. :param name: Name of the component. :param class_type: Class type of the component. :param config: Dictionary of parameters (read from configuration ``.yaml`` file). """ # Call constructors of parent classes. Task.__init__(self, name, CLEVR, config) # Get key mappings of all output streams. self.key_images = self.stream_keys["images"] self.key_image_ids = self.stream_keys["image_ids"] self.key_questions = self.stream_keys["questions"] self.key_answers = self.stream_keys["answers"] self.key_question_type_ids = self.stream_keys["question_type_ids"] self.key_question_type_names = self.stream_keys["question_type_names"] # Get flag informing whether we want to stream images or not. self.stream_images = self.config['stream_images'] # Check the resize image option. if "resize_image" in self.config: if len(self.config['resize_image']) != 2: self.logger.error( "'resize_image' field must contain 2 values: the desired height and width" ) exit(-1) # Output image dimensions. self.height = self.config['resize_image'][0] self.width = self.config['resize_image'][1] self.depth = 3 resize = True else: # Use original image dimensions. self.height = 480 self.width = 320 self.depth = 3 resize = False self.logger.info( "Setting image size to [D x H x W]: {} x {} x {}".format( self.depth, self.height, self.width)) # Set global variables - all dimensions ASIDE OF BATCH. self.globals["image_height"] = self.height self.globals["image_width"] = self.width self.globals["image_depth"] = self.depth # Get image preprocessing. self.image_preprocessing = get_value_list_from_dictionary( "image_preprocessing", self.config, 'none | normalize | all'.split(" | ")) if 'none' in self.image_preprocessing: self.image_preprocessing = [] if 'all' in self.image_preprocessing: self.image_preprocessing = ['normalize'] if resize: # Add resize as transformation. self.image_preprocessing = ["resize"] + self.image_preprocessing self.logger.info("Applied image preprocessing: {}".format( self.image_preprocessing)) # Mapping of question subtypes to types (not used, but keeping it just in case). #self.question_subtype_to_type_mapping = { # 'query_size': 'query_attribute', # 'equal_size': 'compare_attribute', # 'query_shape': 'query_attribute', # 'query_color': 'query_attribute', # 'greater_than': 'compare_integer', # 'equal_material': 'compare_attribute', # 'equal_color': 'compare_attribute', # 'equal_shape': 'compare_attribute', # 'less_than': 'compare_integer', # 'count': 'count', # 'exist': 'exist', # 'equal_integer': 'compare_integer', # 'query_material': 'query_attribute'} # Mapping of question subtypes to types. self.question_subtype_to_id_mapping = { 'query_size': 0, 'equal_size': 1, 'query_shape': 2, 'query_color': 3, 'greater_than': 4, 'equal_material': 5, 'equal_color': 6, 'equal_shape': 7, 'less_than': 8, 'count': 9, 'exist': 10, 'equal_integer': 11, 'query_material': 12 } # Mapping of question families to subtypes. self.question_family_id_to_subtype_mapping = { 0: "equal_integer", 1: "less_than", 2: "greater_than", 3: "equal_integer", 4: "less_than", 5: "greater_than", 6: "equal_integer", 7: "less_than", 8: "greater_than", 9: "equal_size", 10: "equal_color", 11: "equal_material", 12: "equal_shape", 13: "equal_size", 14: "equal_size", 15: "equal_size", 16: "equal_color", 17: "equal_color", 18: "equal_color", 19: "equal_material", 20: "equal_material", 21: "equal_material", 22: "equal_shape", 23: "equal_shape", 24: "equal_shape", 25: "count", 26: "exist", 27: "query_size", 28: "query_shape", 29: "query_color", 30: "query_material", 31: "count", 32: "query_size", 33: "query_color", 34: "query_material", 35: "query_shape", 36: "exist", 37: "exist", 38: "exist", 39: "exist", 40: "count", 41: "count", 42: "count", 43: "count", 44: "exist", 45: "exist", 46: "exist", 47: "exist", 48: "count", 49: "count", 50: "count", 51: "count", 52: "query_color", 53: "query_material", 54: "query_shape", 55: "query_size", 56: "query_material", 57: "query_shape", 58: "query_size", 59: "query_color", 60: "query_shape", 61: "query_size", 62: "query_color", 63: "query_material", 64: "count", 65: "count", 66: "count", 67: "count", 68: "count", 69: "count", 70: "count", 71: "count", 72: "count", 73: "exist", 74: "query_size", 75: "query_color", 76: "query_material", 77: "query_shape", 78: "count", 79: "exist", 80: "query_size", 81: "query_color", 82: "query_material", 83: "query_shape", 84: "count", 85: "exist", 86: "query_shape", 87: "query_material", 88: "query_color", 89: "query_size" } # Finally, "merge" those two. self.question_family_id_to_subtype_id_mapping = { key: self.question_subtype_to_id_mapping[value] for key, value in self.question_family_id_to_subtype_mapping.items() } # Get the absolute path. self.data_folder = os.path.expanduser(self.config['data_folder']) # Get split. split = get_value_from_dictionary( 'split', self.config, "training | validation | test | cogent_a_training | cogent_a_validation | cogent_b_validation" .split(" | ")) # Set split-dependent data. if split == 'training': # Training split folder and file with data question. data_file = os.path.join(self.data_folder, "questions", 'CLEVR_train_questions.json') self.split_image_folder = os.path.join(self.data_folder, "images", "train") elif split == 'validation': # Validation split folder and file with data question. data_file = os.path.join(self.data_folder, "questions", 'CLEVR_val_questions.json') self.split_image_folder = os.path.join(self.data_folder, "images", "val") elif split == 'test': # Test split folder and file with data question. data_file = os.path.join(self.data_folder, "questions", 'CLEVR_test_questions.json') self.split_image_folder = os.path.join(self.data_folder, "images", "test") else: # cogent raise ConfigurationError( "Split {} not supported yet".format(split)) # Load dataset. self.dataset = self.load_dataset(data_file) # Display exemplary sample. i = 0 sample = self.dataset[i] # Check if this is a test set. if "answer" not in sample.keys(): sample["answer"] = "<UNK>" sample[self.key_question_type_ids] = -1 sample[self.key_question_type_names] = "<UNK>" else: sample[ self. key_question_type_ids] = self.question_family_id_to_subtype_id_mapping[ sample["question_family_index"]] sample[ self. key_question_type_names] = self.question_family_id_to_subtype_mapping[ sample["question_family_index"]] self.logger.info( "Exemplary sample {} ({}):\n question_type: {} ({})\n image_ids: {}\n question: {}\n answer: {}" .format(i, sample["question_index"], sample[self.key_question_type_ids], sample[self.key_question_type_names], sample["image_filename"], sample["question"], sample["answer"]))