def get_optimizer_parameters(self, config): lr = config.optimizer.params.lr param_list = [] parameters = [] head_configs = self.config.get("heads", []) for name, module in self.named_children(): # Heads can have different learning rates. This is handled here if name == "heads": # Parameters in the head which have a separate learning # rate, are added as a separate param group for head_config, head in zip(head_configs, self.heads): lr_multiplier = head_config.get("lr_multiplier", 1.0) if lr_multiplier != 1.0: parameters += get_bert_configured_parameters( head, lr * lr_multiplier) else: # Parameters for head modules with same learning rate as # trunk, add to same param group param_list += list(module.named_parameters()) else: # For other modules in trunk, add to same param group param_list += list(module.named_parameters()) parameters += get_bert_configured_parameters(self) return parameters
def get_optimizer_parameters(self, config): if hasattr(self.encoder, "get_optimizer_parameters"): params = self.encoder.get_optimizer_parameters(config) else: params = [{"params": self.encoder.parameters()}] params += get_bert_configured_parameters(self.text_embeddings) params += get_bert_configured_parameters(self.heads_dict) params += [{"params": self.image_embeddings.parameters()}] return params
def get_optimizer_parameters(self, config): # For finetuning setup, we have classifier lr = config.optimizer.params.lr model_config = config.model_config.get(config.model, {}) finetune_lr_multiplier = model_config.get("finetune_lr_multiplier", 1) # Finetune the bert pretrained part with finetune_lr_multiplier if it is set parameters = get_bert_configured_parameters( self.base, lr * finetune_lr_multiplier) parameters += get_bert_configured_parameters(self.classifier, lr) return parameters
def get_optimizer_parameters(self, config): base_lr = config.optimizer.params.lr bert_params = get_bert_configured_parameters(self.text_encoder, base_lr * 0.1) backbone_params = [{ "params": filter_grads(self.image_encoder.parameters()), "lr": base_lr * 0.1, }] rest_params = [ { "params": filter_grads(self.image_proj.parameters()), "lr": base_lr }, { "params": filter_grads(self.text_proj.parameters()), "lr": base_lr }, { "params": filter_grads(self.image_pool.parameters()), "lr": base_lr }, { "params": filter_grads(self.text_pool.parameters()), "lr": base_lr }, { "params": filter_grads(self.shared_transformer.parameters()), "lr": base_lr, }, ] training_parameters = bert_params + backbone_params + rest_params return training_parameters
def set_lr_for_parameters(self, config, module_name, base_lr, module, parameters, param_list): lr_multiplier = config.get("lr_multiplier", 1.0) if lr_multiplier != 1.0: logger.info( f"Setting learning rate of {module_name} to be {base_lr} * {lr_multiplier}." ) # noqa parameters += get_bert_configured_parameters( module, base_lr * lr_multiplier) else: # Parameters for the modules with same learning rate as # trunk, add to same param group param_list += list(module.named_parameters()) return parameters, param_list
def get_optimizer_parameters(self, config): lr = config.optimizer.params.lr param_list = [] parameters = [] head_configs = self.config.get("heads", []) for name, module in self.named_children(): # Heads can have different learning rates. This is handled here if name == "heads": # Parameters in the head which have a separate learning # rate, are added as a separate param group for head_config, head in zip(head_configs, self.heads): parameters, param_list = self.set_lr_for_parameters( config=head_config, module_name="{} head".format( head_config.get("type", "MLP")), base_lr=lr, module=head, parameters=parameters, param_list=param_list, ) elif name == "encoders": for key in module: for modality in self.config.modalities: if key == modality.key: modality_config = modality parameters, param_list = self.set_lr_for_parameters( config=modality_config, module_name=f"{key} encoder", base_lr=lr, module=module[key], parameters=parameters, param_list=param_list, ) else: # For other modules in trunk, add to same param group param_list += list(module.named_parameters()) parameters += get_bert_configured_parameters(param_list) return parameters