def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config, add_pooling_layer=False) self.cls = BertOnlyMLMHead(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.activation = nn.Tanh() self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights() # MLM head is not trained for param in self.cls.parameters(): param.requires_grad = False
def __init__(self, config, args): super(ArabicDialectBERTMaskedLM, self).__init__(config) self.args = args self.bert = BertModel(config, add_pooling_layer=False) self.masking_perc = args["masking_percentage"] self.mask_id = args["mask_id"] self.device_name = args["device"] if args["use_adapters"]: if args["adapter_type"] == "Fusion": self.bert.encoder.layer = nn.ModuleList([ BertLayer_w_Adapters(config, args["bottleneck_dim"], args["current_adapter_to_train"], args["no_total_adapters"], args["stage_2_training"], args["use_adapt_after_fusion"]) for _ in range(config.num_hidden_layers) ]) # self.bert.encoder.layer = nn.ModuleList([BertLayer(config) for _ in range(11)] + [BertLayer_w_Adapters(config, args["bottleneck_dim"], args["current_adapter_to_train"], args["no_total_adapters"], args["stage_2_training"], args["use_adapt_after_fusion"]) for _ in range(1)]) elif args["adapter_type"] == "plain_adapter": self.bert.encoder.layer = nn.ModuleList([ BertLayer_w_PlainAdapters(config, args["bottleneck_dim"], args["current_adapter_to_train"], args["no_total_adapters"], args["stage_2_training"], args["use_adapt_after_fusion"]) for _ in range(config.num_hidden_layers) ]) for param in self.bert.encoder.layer.named_parameters(): if "adapter_layer" not in param[0]: param[1].requires_grad = False # Freeze all except adapters and head self.cls = BertOnlyMLMHead(config) self.init_weights()
def __init__(self, config): super(BertForMaskedLM, self).__init__(config) self.bert = BertModel(config) self.cls = BertOnlyMLMHead(config) self.init_weights()
def __init__(self, config, tokenizer, device): super().__init__() self.config = config self.tokenizer = tokenizer self.embeddings = BertEmbeddings(self.config) self.corrector = BertEncoder(self.config) self.mask_token_id = self.tokenizer.mask_token_id self.cls = BertOnlyMLMHead(self.config) self._device = device
def __init__(self, config: Config, *args, **kwargs): super().__init__(config, *args, **kwargs) # Head modules self.cls = BertOnlyMLMHead(self.config) self.vocab_size = self.config.vocab_size # Loss self.ce_loss = torch.nn.CrossEntropyLoss( ignore_index=self.config.ignore_index)
def __init__(self, config): super(UnilmForSeq2Seq, self).__init__(config) self.bert = UnilmModel(config) self.cls = BertOnlyMLMHead(config) self.crit_mask_lm = nn.CrossEntropyLoss(reduction='none') if hasattr(config, 'label_smoothing') and config.label_smoothing: self.crit_mask_lm_smoothed = LabelSmoothingLoss( config.label_smoothing, config.vocab_size, ignore_index=0, reduction='none') else: self.crit_mask_lm_smoothed = None self.init_weights() self.tie_weights()
def __init__(self, config): # Call the init one parent class up. # Otherwise, the model will be defined twice. BertPreTrainedModel.__init__(self, config) if config.is_decoder: logging.warning( # This warning was included with the original BertForMaskedLM. f"If you want to use `{name_prefix}BertForMaskedLM` make sure " " `config.is_decoder=False` for bi-directional self-attention." ) self.bert = bert_cls(config, add_pooling_layer=False) self.cls = BertOnlyMLMHead(config) self.init_weights()
def __init__(self, config, mask_word_id=0, search_beam_size=1, length_penalty=1.0, eos_id=0, sos_id=0, forbid_duplicate_ngrams=False, forbid_ignore_set=None, ngram_size=3, min_len=0): super(UnilmForSeq2SeqDecode, self).__init__(config) self.bert = UnilmModelIncr(config) self.cls = BertOnlyMLMHead(config) self.crit_mask_lm = nn.CrossEntropyLoss(reduction='none') self.mask_word_id = mask_word_id self.search_beam_size = search_beam_size self.length_penalty = length_penalty self.eos_id = eos_id self.sos_id = sos_id self.forbid_duplicate_ngrams = forbid_duplicate_ngrams self.forbid_ignore_set = forbid_ignore_set self.ngram_size = ngram_size self.min_len = min_len self.init_weights() self.tie_weights()
class LOTClassModel(BertPreTrainedModel): def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config, add_pooling_layer=False) self.cls = BertOnlyMLMHead(config) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.dense = nn.Linear(config.hidden_size, config.hidden_size) self.activation = nn.Tanh() self.classifier = nn.Linear(config.hidden_size, config.num_labels) self.init_weights() # MLM head is not trained for param in self.cls.parameters(): param.requires_grad = False def forward(self, input_ids, pred_mode, attention_mask=None, token_type_ids=None, position_ids=None, head_mask=None, inputs_embeds=None): bert_outputs = self.bert(input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds) last_hidden_states = bert_outputs[0] if pred_mode == "classification": trans_states = self.dense(last_hidden_states) trans_states = self.activation(trans_states) trans_states = self.dropout(trans_states) logits = self.classifier(trans_states) elif pred_mode == "mlm": logits = self.cls(last_hidden_states) else: sys.exit("Wrong pred_mode!") return logits
def __init__(self, config): super().__init__(config) self.bert = BertModel(config=config, add_pooling_layer=False) self.cls = BertOnlyMLMHead(config)
def __init__(self, config, name): super().__init__(config) self.bert = XLNetModel(config) self.cls = BertOnlyMLMHead(config)
def __init__(self, config): super().__init__(config) self.bert = NeZhaModel(config) self.cls = BertOnlyMLMHead(config) self.init_weights()
def __lm_head__(self): return BertOnlyMLMHead(self.lm_config)