def init_weights(self): if self.emb_matrix is None: self.emb.weight.data[1:,:].uniform_(-1.0, 1.0) # keep padding dimension to be 0 else: self.emb_matrix = torch.from_numpy(self.emb_matrix) self.emb.weight.data.copy_(self.emb_matrix) if self.opt['pos_dim'] > 0: self.pos_emb.weight.data[1:,:].uniform_(-1.0, 1.0) if self.opt['ner_dim'] > 0: self.ner_emb.weight.data[1:,:].uniform_(-1.0, 1.0) self.linear.bias.data.fill_(0) init.xavier_uniform_(self.linear.weight, gain=1) # initialize linear layer if self.opt['attn']: self.pe_emb.weight.data.uniform_(-1.0, 1.0) # decide finetuning if self.topn <= 0: print("Do not finetune word embedding layer.") self.emb.weight.requires_grad = False elif self.topn < self.opt['vocab_size']: print("Finetune top {} word embeddings.".format(self.topn)) self.emb.weight.register_hook(lambda x: \ torch_utils.keep_partial_grad(x, self.topn)) else: print("Finetune all embeddings.")
def init_weights(self): # use embeddings from bert bert = BertModel.from_pretrained(self.model_name) bert.resize_token_embeddings(len(constant.tokenizer)) self.emb.weight.data.copy_(bert.embeddings.word_embeddings.weight) if self.opt['pos_dim'] > 0: self.pos_emb.weight.data[1:, :].uniform_(-1.0, 1.0) if self.opt['ner_dim'] > 0: self.ner_emb.weight.data[1:, :].uniform_(-1.0, 1.0) self.linear.bias.data.fill_(0) init.xavier_uniform_(self.linear.weight, gain=1) # initialize linear layer if self.opt['attn']: self.pe_emb.weight.data.uniform_(-1.0, 1.0) # decide finetuning if self.topn <= 0: print("Do not finetune word embedding layer.") self.emb.weight.requires_grad = False elif self.topn < self.opt['vocab_size']: print("Finetune top {} word embeddings.".format(self.topn)) self.emb.weight.register_hook(lambda x: \ torch_utils.keep_partial_grad(x, self.topn)) else: print("Finetune all embeddings.")
def init_pretrained_embeddings_from_numpy(self, pretrained_word_vectors): self.embedding.weight = nn.Parameter( torch.from_numpy(pretrained_word_vectors).float()) if self.config.tune_topk <= 0: print("Do not fine tune word embedding layer") self.embedding.weight.requires_grad = False elif self.config.tune_topk < self.vocab.n_words: print(f"Finetune top {self.config.tune_topk} word embeddings") self.embedding.weight.register_hook( lambda x: torch_utils.keep_partial_grad( x, self.config.tune_topk)) else: print("Finetune all word embeddings")
def init_embeddings(self): if self.emb_matrix is None: self.emb.weight.data[1:, :].uniform_(-1.0, 1.0) else: self.emb_matrix = torch.from_numpy(self.emb_matrix) self.emb.weight.data.copy_(self.emb_matrix) # decide finetuning if self.opt['topn'] <= 0: print("Do not finetune word embedding layer.") self.emb.weight.requires_grad = False elif self.opt['topn'] < self.opt['vocab_size']: print("Finetune top {} word embeddings.".format(self.opt['topn'])) self.emb.weight.register_hook(lambda x: torch_utils.keep_partial_grad(x, self.opt['topn'])) else: print("Finetune all embeddings.")
def init_weights(self): if self.emb_matrix is not None: if isinstance(self.emb_matrix, np.ndarray): self.emb_matrix = torch.from_numpy(self.emb_matrix) assert self.emb_matrix.size() == (self.vocab_size, self.emb_dim), \ "Input embedding matrix must match size: {} x {}".format(self.vocab_size, self.emb_dim) self.embedding.weight.data.copy_(self.emb_matrix) else: init_range = constant.EMB_INIT_RANGE self.embedding.weight.data.uniform_(-init_range, init_range) if self.top <= 0: print("Do not finetune embedding layer.") self.embedding.weight.requires_grad = False elif self.top < self.vocab_size: print("Finetune top {} embeddings.".format(self.top)) self.embedding.weight.register_hook( lambda x: torch_utils.keep_partial_grad(x, self.top)) else: print("Finetune all embeddings.")
def init_embeddings(self): """ Initiliaze word embedding :return: """ if self.emb_matrix is None: self.emb.weight.data[1:, :].uniform_(-1.0, 1.0) else: self.emb.weight.data.copy_(self.emb_matrix) # decide finetuning if self.opt['topn'] <= 0: print('Do not finetune word embedding layer.') self.emb.weight.requires_grad = False elif self.opt['topn'] < self.opt['vocab_size']: print('Finetune top {} word embeddings.'.format(self.opt['topn'])) self.emb.weight.register_hook(lambda grad : torch_utils.keep_partial_grad(grad, self.opt['topn'])) else: print('Finetune all embeddings.')
def init_weights(self): if self.emb_matrix is None: self.word_emb.weight.data[1:, :].uniform_( -1.0, 1.0) # keep padding dimension to be 0 else: self.emb_matrix = torch.from_numpy(self.emb_matrix) self.word_emb.weight.data.copy_(self.emb_matrix) self.char_emb.weight.data[1:, :].uniform_(-1.0, 1.0) self.pos_emb.weight.data[1:, :].uniform_(-1.0, 1.0) # decide finetuning if self.topn <= 0: print("Do not finetune word embedding layer.") self.word_emb.weight.requires_grad = False elif self.topn < self.opt['word_vocab_size']: print("Finetune top {} word embeddings.".format(self.topn)) self.word_emb.weight.register_hook(lambda x: \ torch_utils.keep_partial_grad(x, self.topn)) else: print("Finetune all embeddings.")
def init_weights(self): if self.emb_matrix is None: # if emb matrix is Noen, random init word vector and keep padding dimension to be 0 self.emb.weight.data[1:, :].uniform_(-1.0, 1.0) else: self.emb.weight.data.copy_(self.emb_matrix) self.linear.bias.data.fill_(0) init.xavier_uniform_(self.linear.weight, gain=1) # initialize linear layer init.xavier_uniform_(self.predict.weight, gain=1) # decide finetuning if self.topn <= 0: print("Do not finetune word embedding layer.") self.emb.weight.requires_grad = False elif self.topn < self.opt['vocab_size']: print("Finetune top {} word embeddings.".format(self.topn)) self.emb.weight.register_hook(lambda x: \ torch_utils.keep_partial_grad(x, self.topn)) else: print("Finetune all embeddings.")