class BertNER(nn.Module): def __init__(self, vocab_size=None, device='cpu', training=False): super().__init__() bert_vocab_size = 30522 config = BertConfig(bert_vocab_size, max_position_embeddings=512) self.bert = BertModel(config).from_pretrained('bert-base-cased').to( device) self.classifier = nn.Linear(768, vocab_size) self.device = device self.training = training self.bert.eval() def forward(self, x): x = x.to(self.device) if self.training: self.bert.train() layers_out, _ = self.bert(x) last_layer = layers_out[-1] else: with torch.no_grad(): layers_out, _ = self.bert(x) last_layer = layers_out[-1] logits = self.classifier(last_layer) preds = logits.argmax(-1) return logits, preds
class Net(nn.Module): def __init__(self, config, bert_state_dict, vocab_len, device = 'cpu'): super().__init__() self.bert = BertModel(config) if bert_state_dict is not None: self.bert.load_state_dict(bert_state_dict) self.bert.eval() self.rnn = nn.LSTM(bidirectional=True, num_layers=2, input_size=768, hidden_size=768//2, batch_first=True) self.fc = nn.Linear(768, vocab_len) self.device = device def forward(self, x, y): ''' x: (N, T). int64 y: (N, T). int64 Returns enc: (N, T, VOCAB) ''' x = x.to(self.device) y = y.to(self.device) with torch.no_grad(): encoded_layers, _ = self.bert(x) enc = encoded_layers[-1] enc, _ = self.rnn(enc) logits = self.fc(enc) y_hat = logits.argmax(-1) return logits, y, y_hat
def get_kobert_model(model_file, vocab_file, ctx="cpu"): bertmodel = BertModel(config=BertConfig.from_dict(bert_config)) bertmodel.load_state_dict(torch.load(model_file)) device = torch.device(ctx) bertmodel.to(device) bertmodel.eval() vocab_b_obj = nlp.vocab.BERTVocab.from_json(open(vocab_file, 'rt').read()) return bertmodel, vocab_b_obj
def get_kobert_model(model_file, vocab_file, ctx="cpu"): bertmodel = BertModel(config=BertConfig.from_dict(bert_config)) bertmodel.load_state_dict(torch.load(model_file)) device = torch.device(ctx) bertmodel.to(device) bertmodel.eval() vocab_b_obj = nlp.vocab.BERTVocab.from_sentencepiece(vocab_file, padding_token='[PAD]') return bertmodel, vocab_b_obj
def create_bert_model(self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels): model = BertModel(config=config) model.eval() all_encoder_layers, pooled_output = model(input_ids, token_type_ids, input_mask) outputs = { "sequence_output": all_encoder_layers[-1], "pooled_output": pooled_output, "all_encoder_layers": all_encoder_layers, } return outputs
def prepare_bert(params, texts): # Load pre-trained model tokenizer (vocabulary) tokenizer = BertTokenizer.from_pretrained('bert-large-uncased') \ if params['bert']['large'] else BertTokenizer.from_pretrained('bert-base-uncased') if params['bert']['trained']: model = BertModel.from_pretrained('bert-large-uncased') \ if params['bert']['large'] else BertModel.from_pretrained('bert-base-uncased') else: model = BertModel(BertModel.from_pretrained('bert-large-uncased').config \ if params['bert']['large'] else BertModel.from_pretrained('bert-base-uncased').config) model.eval() params['encoder'] = model params['tokenizer'] = tokenizer
def get_kobert_model(ctx="cpu"): model_file = './kobert_model/pytorch_kobert_2439f391a6.params' vocab_file = './kobert_model/kobertvocab_f38b8a4d6d.json' bertmodel = BertModel(config=BertConfig.from_dict(bert_config)) bertmodel.load_state_dict(torch.load(model_file)) device = torch.device(ctx) bertmodel.to(device) bertmodel.eval() #print(vocab_file) #./kobertvocab_f38b8a4d6d.json vocab_b_obj = nlp.vocab.BERTVocab.from_json( open(vocab_file, 'rt').read()) #print(vocab_b_obj) return bertmodel, vocab_b_obj
def encode_bert(texts, trained=True, large=False): # Load pre-trained model tokenizer (vocabulary) tokenizer = BertTokenizer.from_pretrained('bert-large-uncased') \ if large else BertTokenizer.from_pretrained('bert-base-uncased') if trained: model = BertModel.from_pretrained('bert-large-uncased') \ if large else BertModel.from_pretrained('bert-base-uncased') else: model = BertModel(BertModel.from_pretrained('bert-large-uncased').config \ if large else BertModel.from_pretrained('bert-base-uncased').config) model.eval() for text in texts: text = "[CLS] {} [SEP]".format(text.lower()) tokenized_text = tokenizer.tokenize(text) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text) tokens_tensor = torch.tensor([indexed_tokens]) with torch.no_grad(): encoded_layers, pooled = model(tokens_tensor) yield encoded_layers, pooled
class STS_NET(nn.Module): def __init__(self, config, bert_state_dict, device=Param.device): super().__init__() self.bert = BertModel(config) #print('bert initialized from config') if bert_state_dict is not None: self.bert.load_state_dict(bert_state_dict) self.bert.eval() self.dropout = nn.Dropout(p=Param.p) self.rnn = nn.LSTM(bidirectional=True, num_layers=1, input_size=768, hidden_size=768 // 2) self.f1 = nn.Linear(768 // 2, 128) self.f2 = nn.Linear(128, 32) self.out = nn.Linear(32, 1) self.device = device def init_hidden(self, batch_size): return torch.zeros(2, batch_size, 768 // 2).to(self.device), torch.zeros( 2, batch_size, 768 // 2).to(self.device) def forward(self, x_f, x_r): batch_size = x_f.size()[0] x_f = x_f.to(self.device) x_r = x_r.to(self.device) xf_encoded_layers, _ = self.bert(x_f) enc_f = xf_encoded_layers[-1] enc = enc_f.permute(1, 0, 2) enc = self.dropout(enc) self.hidden = self.init_hidden(batch_size) rnn_out, self.hidden = self.rnn(enc, self.hidden) last_hidden_state, last_cell_state = self.hidden rnn_out = self.dropout(last_hidden_state) f1_out = F.relu(self.f1(last_hidden_state[-1])) f2_out = F.relu(self.f2(f1_out)) out = self.out(f2_out) return out
def get_bert_word_emb(text_array): tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # Load pre-trained model (weights) BertModel = BertModel.from_pretrained('bert-base-uncased').to(device) BertModel.eval()
def update(self, val, n=1): self.sum += val * n self.count += n self.avg = self.sum / self.count # Device configuration device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # Load pre-trained model tokenizer (vocabulary) tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') # Load pre-trained model (weights) BertModel = BertModel.from_pretrained('bert-base-uncased').to(device) BertModel.eval() # Load GloVe glove_vectors = pickle.load(open('glove.6B/glove_words.pkl', 'rb')) glove_vectors = torch.tensor(glove_vectors) ##################### # Encoder RASNET CNN ##################### class Encoder(nn.Module): def __init__(self): super(Encoder, self).__init__() resnet = models.resnet101(pretrained=True) self.resnet = nn.Sequential(*list(resnet.children())[:-2]) self.adaptive_pool = nn.AdaptiveAvgPool2d((14, 14))
class Net(nn.Module): def __init__(self, config, bert_state_dict, vocab_len, device='cuda'): super().__init__() self.bert = BertModel(config) self.num_layers = 2 self.input_size = 768 self.hidden_size = 768 self.tagset_size = vocab_len # BERT always returns hidden_dim*2 dimensional representations. if bert_state_dict is not None: self.bert.load_state_dict(bert_state_dict) self.bert.eval() # Each input has vector size 768, and outpus a vector size of 768//2. self.lstm = nn.LSTM(self.input_size, self.hidden_size // 2, self.num_layers, batch_first=True, bidirectional=True) self.fc = nn.Linear(self.hidden_size, vocab_len) self.device = device def init_hidden(self, batch_size): ''' Initializes hidden state ''' # Create two new tensors with sizes n_layers x batch_size x hidden_dim, # initialized to zero, for hidden state and cell state of LSTM weight = next(self.parameters()).data if self.device == 'cuda': hidden = (nn.init.xavier_normal_( weight.new(self.num_layers * 2, batch_size, self.hidden_size // 2).zero_()).cuda(), nn.init.xavier_normal_( weight.new(self.num_layers * 2, batch_size, self.hidden_size // 2).zero_()).cuda()) else: hidden = (nn.init.xavier_normal_( weight.new(self.num_layers * 2, batch_size, self.hidden_size // 2).zero_()), nn.init.xavier_normal_( weight.new(self.num_layers * 2, batch_size, self.hidden_size // 2).zero_())) return hidden def init_eval_hidden(self, batch_size): ''' Initializes hidden state ''' # Create two new tensors with sizes n_layers x batch_size x hidden_dim, # initialized to zero, for hidden state and cell state of LSTM weight = next(self.parameters()).data hidden = (nn.init.xavier_normal_( weight.new(self.num_layers * 2, 1, self.hidden_size // 2).zero_()), nn.init.xavier_normal_( weight.new(self.num_layers * 2, 1, self.hidden_size // 2).zero_())) return hidden def forward(self, x, hidden): with torch.no_grad(): encoded_layers, _ = self.bert(x) enc = encoded_layers[-1] out, hidden = self.lstm(enc, hidden) logits = self.fc(out) # softmax = torch.nn.Softmax(dim=2) # logits = softmax(logits) y_hat = logits.argmax(-1) return logits, hidden, y_hat