예제 #1
0
 def __init__(self, config):
     super().__init__(config)
     self.num_labels = config.num_labels
     self.bert = BertModel(config, add_pooling_layer=False)
     self.cls = BertOnlyMLMHead(config)
     self.dropout = nn.Dropout(config.hidden_dropout_prob)
     self.dense = nn.Linear(config.hidden_size, config.hidden_size)
     self.activation = nn.Tanh()
     self.classifier = nn.Linear(config.hidden_size, config.num_labels)
     self.init_weights()
     # MLM head is not trained
     for param in self.cls.parameters():
         param.requires_grad = False
예제 #2
0
    def __init__(self, config, args):
        super(ArabicDialectBERTMaskedLM, self).__init__(config)
        self.args = args
        self.bert = BertModel(config, add_pooling_layer=False)
        self.masking_perc = args["masking_percentage"]
        self.mask_id = args["mask_id"]
        self.device_name = args["device"]
        if args["use_adapters"]:
            if args["adapter_type"] == "Fusion":
                self.bert.encoder.layer = nn.ModuleList([
                    BertLayer_w_Adapters(config, args["bottleneck_dim"],
                                         args["current_adapter_to_train"],
                                         args["no_total_adapters"],
                                         args["stage_2_training"],
                                         args["use_adapt_after_fusion"])
                    for _ in range(config.num_hidden_layers)
                ])
                # self.bert.encoder.layer = nn.ModuleList([BertLayer(config) for _ in range(11)] + [BertLayer_w_Adapters(config, args["bottleneck_dim"], args["current_adapter_to_train"], args["no_total_adapters"], args["stage_2_training"], args["use_adapt_after_fusion"]) for _ in range(1)])
            elif args["adapter_type"] == "plain_adapter":
                self.bert.encoder.layer = nn.ModuleList([
                    BertLayer_w_PlainAdapters(config, args["bottleneck_dim"],
                                              args["current_adapter_to_train"],
                                              args["no_total_adapters"],
                                              args["stage_2_training"],
                                              args["use_adapt_after_fusion"])
                    for _ in range(config.num_hidden_layers)
                ])
            for param in self.bert.encoder.layer.named_parameters():
                if "adapter_layer" not in param[0]:
                    param[1].requires_grad = False
            # Freeze all except adapters and head

        self.cls = BertOnlyMLMHead(config)
        self.init_weights()
    def __init__(self, config):
        super(BertForMaskedLM, self).__init__(config)

        self.bert = BertModel(config)
        self.cls = BertOnlyMLMHead(config)

        self.init_weights()
예제 #4
0
 def __init__(self, config, tokenizer, device):
     super().__init__()
     self.config = config
     self.tokenizer = tokenizer
     self.embeddings = BertEmbeddings(self.config)
     self.corrector = BertEncoder(self.config)
     self.mask_token_id = self.tokenizer.mask_token_id
     self.cls = BertOnlyMLMHead(self.config)
     self._device = device
예제 #5
0
    def __init__(self, config: Config, *args, **kwargs):
        super().__init__(config, *args, **kwargs)

        # Head modules
        self.cls = BertOnlyMLMHead(self.config)
        self.vocab_size = self.config.vocab_size

        # Loss
        self.ce_loss = torch.nn.CrossEntropyLoss(
            ignore_index=self.config.ignore_index)
예제 #6
0
 def __init__(self, config):
     super(UnilmForSeq2Seq, self).__init__(config)
     self.bert = UnilmModel(config)
     self.cls = BertOnlyMLMHead(config)
     self.crit_mask_lm = nn.CrossEntropyLoss(reduction='none')
     if hasattr(config, 'label_smoothing') and config.label_smoothing:
         self.crit_mask_lm_smoothed = LabelSmoothingLoss(
             config.label_smoothing, config.vocab_size, ignore_index=0, reduction='none')
     else:
         self.crit_mask_lm_smoothed = None
     self.init_weights()
     self.tie_weights()
예제 #7
0
        def __init__(self, config):

            # Call the init one parent class up.
            # Otherwise, the model will be defined twice.
            BertPreTrainedModel.__init__(self, config)

            if config.is_decoder:
                logging.warning(
                    # This warning was included with the original BertForMaskedLM.
                    f"If you want to use `{name_prefix}BertForMaskedLM` make sure "
                    " `config.is_decoder=False` for bi-directional self-attention."
                )

            self.bert = bert_cls(config, add_pooling_layer=False)
            self.cls = BertOnlyMLMHead(config)

            self.init_weights()
예제 #8
0
 def __init__(self, config, mask_word_id=0,
              search_beam_size=1, length_penalty=1.0, eos_id=0, sos_id=0,
              forbid_duplicate_ngrams=False, forbid_ignore_set=None, ngram_size=3, min_len=0):
     super(UnilmForSeq2SeqDecode, self).__init__(config)
     self.bert = UnilmModelIncr(config)
     self.cls = BertOnlyMLMHead(config)
     self.crit_mask_lm = nn.CrossEntropyLoss(reduction='none')
     self.mask_word_id = mask_word_id
     self.search_beam_size = search_beam_size
     self.length_penalty = length_penalty
     self.eos_id = eos_id
     self.sos_id = sos_id
     self.forbid_duplicate_ngrams = forbid_duplicate_ngrams
     self.forbid_ignore_set = forbid_ignore_set
     self.ngram_size = ngram_size
     self.min_len = min_len
     self.init_weights()
     self.tie_weights()
예제 #9
0
class LOTClassModel(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.num_labels = config.num_labels
        self.bert = BertModel(config, add_pooling_layer=False)
        self.cls = BertOnlyMLMHead(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
        self.activation = nn.Tanh()
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        self.init_weights()
        # MLM head is not trained
        for param in self.cls.parameters():
            param.requires_grad = False

    def forward(self,
                input_ids,
                pred_mode,
                attention_mask=None,
                token_type_ids=None,
                position_ids=None,
                head_mask=None,
                inputs_embeds=None):
        bert_outputs = self.bert(input_ids,
                                 attention_mask=attention_mask,
                                 token_type_ids=token_type_ids,
                                 position_ids=position_ids,
                                 head_mask=head_mask,
                                 inputs_embeds=inputs_embeds)
        last_hidden_states = bert_outputs[0]
        if pred_mode == "classification":
            trans_states = self.dense(last_hidden_states)
            trans_states = self.activation(trans_states)
            trans_states = self.dropout(trans_states)
            logits = self.classifier(trans_states)
        elif pred_mode == "mlm":
            logits = self.cls(last_hidden_states)
        else:
            sys.exit("Wrong pred_mode!")
        return logits
예제 #10
0
 def __init__(self, config):
     super().__init__(config)
     self.bert = BertModel(config=config, add_pooling_layer=False)
     self.cls = BertOnlyMLMHead(config)
 def __init__(self, config, name):
     super().__init__(config)
     self.bert = XLNetModel(config)
     self.cls = BertOnlyMLMHead(config)
예제 #12
0
 def __init__(self, config):
     super().__init__(config)
     self.bert = NeZhaModel(config)
     self.cls = BertOnlyMLMHead(config)
     self.init_weights()
예제 #13
0
 def __lm_head__(self):
     return BertOnlyMLMHead(self.lm_config)