def __init__(self, config): super(BertForMaskedLM, self).__init__(config) self.bert = BertModel(config) self.cls = BertOnlyMLMHead(config, self.bert.embeddings.word_embeddings.weight) self.apply(self.init_bert_weights)
def __init__(self, config, label_size, vocab): super(BertForMaskedLM, self).__init__(config, label_size) self.vocab = vocab self.bert = BertModel(config, label_size) self.dropout = nn.Dropout(config.hidden_dropout_prob) self.cls = BertOnlyMLMHead(config, self.bert.embeddings.word_embeddings.weight) self.apply(self.init_bert_weights)
def init_word_embedding(self, num_special_tokens): orig_word_num = self.bert.embeddings.word_embeddings.weight.size(0) new_emb = nn.Embedding( orig_word_num + num_special_tokens, self.bert.config.hidden_size) new_emb.apply(self.init_bert_weights) emb = self.bert.embeddings.word_embeddings.weight.data new_emb.weight.data[:orig_word_num, :].copy_(emb) self.bert.embeddings.word_embeddings = new_emb self.cls = BertOnlyMLMHead( self.bert.config, self.bert.embeddings.word_embeddings.weight)
def test_BertOnlyMLMHead(): input_ids = torch.LongTensor([[31, 51, 99], [15, 5, 0]]) input_mask = torch.LongTensor([[1, 1, 1], [1, 1, 0]]) token_type_ids = torch.LongTensor([[0, 0, 1], [0, 1, 0]]) config = BertConfig(vocab_size_or_config_json_file=32000, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072) embeddings = BertEmbeddings(config) model = BertOnlyMLMHead(config, embeddings.word_embeddings.weight) embedding_output = embeddings(input_ids, token_type_ids) print(model(embedding_output))
def load_fine_tuned_model(bert_model, text_encoder, path): """Load fine-tuned bert model given text encoder and checkpoint path.""" bert_text_encoder = BertLikeSentencePieceTextEncoder(text_encoder) model = BertForMaskedLM.from_pretrained(bert_model) model.bert_text_encoder = bert_text_encoder model.bert.embeddings.word_embeddings = nn.Embedding( bert_text_encoder.vocab_size, model.bert.embeddings.word_embeddings.weight.shape[1]) model.config.vocab_size = bert_text_encoder.vocab_size model.cls = BertOnlyMLMHead(model.config, model.bert.embeddings.word_embeddings.weight) model.load_state_dict(torch.load(path)) return model
def __init__(self, config, hidden=100, gpu=True, dropout_prob=0.3, bert_cache_dir=None, version='large', focal=False): super(OpinioNet, self).__init__(config) self.version = version if self.version == 'tiny': self._tiny_version_init(hidden) self.focal = focal self.bert_cache_dir = bert_cache_dir self.bert = BertModel(config) self.apply(self.init_bert_weights) self.bert_hidden_size = self.config.hidden_size self.w_as11 = nn.Linear(self.bert_hidden_size, hidden) self.w_as12 = nn.Linear(self.bert_hidden_size, hidden) self.w_ae11 = nn.Linear(self.bert_hidden_size, hidden) self.w_ae12 = nn.Linear(self.bert_hidden_size, hidden) self.w_os11 = nn.Linear(self.bert_hidden_size, hidden) self.w_os12 = nn.Linear(self.bert_hidden_size, hidden) self.w_oe11 = nn.Linear(self.bert_hidden_size, hidden) self.w_oe12 = nn.Linear(self.bert_hidden_size, hidden) self.w_as2 = nn.Linear(hidden, 1) self.w_ae2 = nn.Linear(hidden, 1) self.w_os2 = nn.Linear(hidden, 1) self.w_oe2 = nn.Linear(hidden, 1) self.w_obj = nn.Linear(self.bert_hidden_size, 1) self.w_common = nn.Linear(self.bert_hidden_size, len(ID2COMMON)) self.w_makeup = nn.Linear(self.bert_hidden_size, len(ID2MAKUP) - len(ID2COMMON)) self.w_laptop = nn.Linear(self.bert_hidden_size, len(ID2LAPTOP) - len(ID2COMMON)) self.w_p = nn.Linear(self.bert_hidden_size, len(ID2P)) self.cls = BertOnlyMLMHead(config, self.bert.embeddings.word_embeddings.weight) # self.w_num = nn.Linear(self.bert_hidden_size, 8) self.dropout = nn.Dropout(dropout_prob) self.softmax = nn.Softmax(dim=-1) self.log_softmax = nn.LogSoftmax(dim=-1) self.kl_loss = nn.KLDivLoss(reduction='batchmean') if gpu: self.cuda()