def __init__(self, config): super().__init__() self.config = config self.output_attentions = self.config.output_attentions self.output_hidden_states = self.config.output_hidden_states # If bert_model_name is not specified, you will need to specify # all of the required parameters for BERTConfig and a pretrained # model won't be loaded self.bert_model_name = getattr(self.config, "bert_model_name", None) self.bert_config = BertConfig.from_dict( OmegaConf.to_container(self.config, resolve=True)) if self.bert_model_name is None: self.bert = VisualBERTBase( self.bert_config, visual_embedding_dim=self.config.visual_embedding_dim, embedding_strategy=self.config.embedding_strategy, bypass_transformer=self.config.bypass_transformer, output_attentions=self.config.output_attentions, output_hidden_states=self.config.output_hidden_states, ) else: self.bert = VisualBERTBase.from_pretrained( self.config.bert_model_name, config=self.bert_config, cache_dir=os.path.join(get_mmf_cache_dir(), "distributed_{}".format(-1)), visual_embedding_dim=self.config.visual_embedding_dim, embedding_strategy=self.config.embedding_strategy, bypass_transformer=self.config.bypass_transformer, output_attentions=self.config.output_attentions, output_hidden_states=self.config.output_hidden_states, ) self.vocab_size = self.bert.config.vocab_size # TODO: Once omegaconf fixes int keys issue, bring this back # See https://github.com/omry/omegaconf/issues/149 # with omegaconf.open_dict(self.config): # # Add bert config such as hidden_state to our main config # self.config.update(self.bert.config.to_dict()) if self.bert_model_name is None: bert_masked_lm = BertForPreTraining(self.bert.config) else: bert_masked_lm = BertForPreTraining.from_pretrained( self.config.bert_model_name, config=self.bert.config, cache_dir=os.path.join(get_mmf_cache_dir(), "distributed_{}".format(-1)), ) self.cls = deepcopy(bert_masked_lm.cls) self.loss_fct = nn.CrossEntropyLoss(ignore_index=-1) self.init_weights()
def __init__(self, config): super().__init__() self.config = config self.output_attentions = self.config.output_attentions self.output_hidden_states = self.config.output_hidden_states self.pooler_strategy = self.config.get("pooler_strategy", "default") # If bert_model_name is not specified, you will need to specify # all of the required parameters for BERTConfig and a pretrained # model won't be loaded self.bert_model_name = getattr(self.config, "bert_model_name", None) self.bert_config = BertConfig.from_dict( OmegaConf.to_container(self.config, resolve=True)) if self.bert_model_name is None: self.bert = VisualBERTBase( self.bert_config, visual_embedding_dim=self.config.visual_embedding_dim, embedding_strategy=self.config.embedding_strategy, bypass_transformer=self.config.bypass_transformer, output_attentions=self.config.output_attentions, output_hidden_states=self.config.output_hidden_states, ) else: self.bert = VisualBERTBase.from_pretrained( self.config.bert_model_name, config=self.bert_config, cache_dir=os.path.join(get_mmf_cache_dir(), "distributed_{}".format(-1)), visual_embedding_dim=self.config.visual_embedding_dim, embedding_strategy=self.config.embedding_strategy, bypass_transformer=self.config.bypass_transformer, output_attentions=self.config.output_attentions, output_hidden_states=self.config.output_hidden_states, ) self.training_head_type = self.config.training_head_type self.num_labels = self.config.num_labels self.dropout = nn.Dropout(self.bert.config.hidden_dropout_prob) if self.config.training_head_type == "nlvr2": self.bert.config.hidden_size *= 2 self.classifier = nn.Sequential( BertPredictionHeadTransform(self.bert.config), nn.Linear(self.bert.config.hidden_size, self.config.num_labels), ) self.init_weights()