Ejemplo n.º 1
0
class BertPretrainingLoss(BertPreTrainedModel):
    def __init__(self, bert_encoder, config):
        super(BertPretrainingLoss, self).__init__(config)
        self.bert = bert_encoder
        self.cls = BertPreTrainingHeads(config)
        self.cls.predictions.decoder.weight = self.bert.embeddings.word_embeddings.weight
        self.cls.apply(self.init_bert_weights)

    def init_bert_weights(self, module):
        """ Initialize the weights.
        """
        if isinstance(module, (nn.Linear, nn.Embedding)):
            # Slightly different from the TF version which uses truncated_normal for initialization
            # cf https://github.com/pytorch/pytorch/pull/5617
            module.weight.data.normal_(mean=0.0, std=self.config.initializer_range)
        elif isinstance(module, BertLayerNorm):
            module.bias.data.zero_()
            module.weight.data.fill_(1.0)
        if isinstance(module, nn.Linear) and module.bias is not None:
            module.bias.data.zero_()

    def forward(self, input_ids, token_type_ids=None, attention_mask=None, masked_lm_labels=None, next_sentence_label=None):
        sequence_output, pooled_output = self.bert(input_ids, attention_mask, token_type_ids)
        prediction_scores, seq_relationship_score = self.cls(sequence_output, pooled_output)

        if masked_lm_labels is not None and next_sentence_label is not None:
            loss_fct = CrossEntropyLoss(ignore_index=-1)
            next_sentence_loss = loss_fct(
                seq_relationship_score.view(-1, 2), next_sentence_label.view(-1))
            masked_lm_loss = loss_fct(
                prediction_scores.view(-1, self.config.vocab_size), masked_lm_labels.view(-1))
            total_loss = masked_lm_loss + next_sentence_loss
            return total_loss
        else:
            return prediction_scores, seq_relationship_score
Ejemplo n.º 2
0
 def __init__(self, config):
     super().__init__(config)
     self.config = config
     self.bert = UnilmModel(config)
     self.cls = BertPreTrainingHeads(config)
     self.hist_index = int(config.output_hidden_states) + int(
         config.output_attentions) + 2  # refer to line 600
Ejemplo n.º 3
0
    def __init__(self, config):
        super().__init__(config)
        self.bert = BertModel(config)
        self.cls = BertPreTrainingHeads(config)
        self.qa_outputs = torch.nn.Linear(config.hidden_size, 2)

        self.init_weights()
Ejemplo n.º 4
0
 def _init_classifier(self, hidden_size):
     if "pretraining" in self.config.training_head_type:
         self.classifier = BertPreTrainingHeads(self.bert_config)
     if "vqa" in self.config.training_head_type:
         self.dropout = nn.Dropout(self.bert_config.hidden_dropout_prob)
         self.answer_space_size = 3129
         self.classifier = nn.Sequential(
             BertPredictionHeadTransform(self.bert_config),
             nn.Linear(self.bert_config.hidden_size,
                       self.answer_space_size),
         )
         # self.classifier = nn.Linear(self.bert_config.hidden_size,
         # self.answer_space_size)
     elif "vizwiz" in self.config.training_head_type:
         self.dropout = nn.Dropout(self.bert_config.hidden_dropout_prob)
         self.answer_space_size = 7371
         self.classifier = nn.Sequential(
             BertPredictionHeadTransform(self.bert_config),
             nn.Linear(self.bert_config.hidden_size,
                       self.answer_space_size),
         )
         # self.classifier = nn.Linear(self.bert_config.hidden_size,
         # self.answer_space_size)
     elif self.config.training_head_type == "visual_entailment":
         self.dropout = nn.Dropout(self.bert_config.hidden_dropout_prob)
         self.classifier = nn.Sequential(
             BertPredictionHeadTransform(self.bert_config),
             nn.Linear(self.bert_config.hidden_size, 3),
         )
    def __init__(self, config):
        super().__init__(config)

        self.bert = BertModel(config)
        self.cls = BertPreTrainingHeads(config)

        self.init_weights()
Ejemplo n.º 6
0
    def __init__(self, config):
        super().__init__(config)

        self.trelm_roberta = TrelmRobertaModel(config)
        self.cls = BertPreTrainingHeads(config)

        self.init_weights()
Ejemplo n.º 7
0
 def __init__(self, config, fit_size=768):
     super(TinyBertForPreTraining, self).__init__(config)
     config.output_attentions = True
     config.output_hidden_states = True
     self.bert = BertModel(config)
     self.cls = BertPreTrainingHeads(config)
     self.fit_dense = nn.Linear(config.hidden_size, fit_size)
     self.init_weights()
Ejemplo n.º 8
0
 def __init__(self, config):
     super().__init__(config)
     self.num_labels = config.num_labels
     self.bert = BertModel(config)
     self.cls = BertPreTrainingHeads(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.init_weights()
Ejemplo n.º 9
0
 def __init__(self, config):
     super(Stage1, self).__init__()
     self.layers = []
     for i in range(12):  #config.num_hidden_layers):
         self.layers.append(BertLayer(config))
     self.layers = torch.nn.ModuleList(self.layers)
     self.pooling_layer = BertPooler(config)
     self.pre_training_heads_layer = BertPreTrainingHeads(config)
     self.config = config
     self.apply(self.init_bert_weights)
 def __init__(self, config):
     super(BertForPretrainingDialog, self).__init__(config)
     self.bert = BertModelDialog(config)
     self.cls = BertPreTrainingHeads(config)
     self.video_ff = nn.Linear(4224, config.hidden_size)
     self.video_inverse_ff = nn.Linear(config.hidden_size, 4224)
     se
     f.vid_cls = SimpleClassifier(config.hidden_size,
                                  config.hidden_size * 2, 157, 0.5)
     self.init_weights()
     self.tie_weights()
Ejemplo n.º 11
0
    def __init__(self, config):
        # dont call constructor of BertPreTrainingModel
        # but call it's super constructor
        super(BertForPreTraining, self).__init__(config)
        # create model and heads
        self.bert = KnowBertModel(config)
        self.cls = BertPreTrainingHeads(config)
        # initialize weights
        self.init_weights()

        # initialize helper
        KnowBertHelper.__init__(self, self.bert.encoder)
Ejemplo n.º 12
0
    def __init__(self, config: BertConfig):
        super().__init__(config)

        self.num_labels = config.num_labels

        # BERT model that we want to train
        self.bert = BertModel(config)

        # For PreTraining
        self.cls = BertPreTrainingHeads(config)

        # For Morphological guessing
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.classifier = nn.Linear(config.hidden_size, self.num_labels)

        # Initialize weights
        self.init_weights()
Ejemplo n.º 13
0
 def __init__(self, bert_encoder, config):
     super(BertPretrainingLoss, self).__init__(config)
     self.bert = bert_encoder
     self.cls = BertPreTrainingHeads(config)
     self.cls.predictions.decoder.weight = self.bert.embeddings.word_embeddings.weight
     self.cls.apply(self.init_bert_weights)