def validate(self, dev_word_lists, dev_tag_lists, word2id, tag2id): self.model.eval() with torch.no_grad(): val_losses = 0. val_step = 0 for ind in range(0, len(dev_word_lists), self.batch_size): val_step += 1 # 准备batch数据 batch_sents = dev_word_lists[ind:ind + self.batch_size] batch_tags = dev_tag_lists[ind:ind + self.batch_size] tensorized_sents, lengths = tensorized(batch_sents, word2id) tensorized_sents = tensorized_sents.to(self.device) targets, lengths = tensorized(batch_tags, tag2id) targets = targets.to(self.device) # forward scores = self.model(tensorized_sents, lengths) # 计算损失 loss = self.cal_loss_func(scores, targets, tag2id).to(self.device) val_losses += loss.item() val_loss = val_losses / val_step if val_loss < self._best_val_loss: print("保存模型...") self.best_model = deepcopy(self.model) self._best_val_loss = val_loss return val_loss
def validate(self, dev_word_lists, dev_tag_lists, word2id, tag2id): self.model.eval() #不追踪梯度,节省内存 with torch.no_grad(): val_losses = 0. val_step = 0 for ind in range(0, len(dev_word_lists), self.batch_size): val_step += 1 batch_sents = dev_word_lists[ind:ind + self.batch_size] batch_tag = dev_tag_lists[ind:ind + self.batch_size] tensorized_sent, lengths = tensorized(batch_sents, word2id) tensorized_tag, lengths = tensorized(batch_tag, tag2id) tensorized_sent = tensorized_sent.to(self.device) tensorized_tag = tensorized_tag.to(self.device) scores = self.model(tensorized_sent, lengths) loss = self.model.loss_cal(scores, lengths, tensorized_tag) val_losses += loss.item() val_loss = val_losses / val_step #每个batch随机损失值 if val_loss < self._best_val_loss: print("Upgrade Model and save Model") self.best_model = deepcopy(self.model) #deepcopy 深度复制,重新建立一个对象 self._best_val_loss = val_loss return val_loss
def test(self, test_word_lists, test_tag_lists, word2id, tag2id): test_word_lists, test_tag_lists, indices = sort_by_lengths( test_word_lists, test_tag_lists) tensorized_sent, lengths = tensorized(test_word_lists, word2id) tag_lists = [ test_tag_list[:lengths[i]] for i, test_tag_list in enumerate(test_tag_lists) ] self.best_model.eval() pred_tagid_lists = [] with torch.no_grad(): B = self.batch_size for ind in range(0, len(test_word_lists), B): tensorized_batch_sent = tensorized_sent.to(self.device) batch_tagids = self.best_model.test(tensorized_batch_sent, lengths, tag2id) #[B,L] pred_tagid_lists += batch_tagids id2tag = dict((id, tag) for tag, id in tag2id.items()) pred_tag_lists = [] #[B,L] for i, ids in enumerate(pred_tagid_lists): tag_list = [] for j in range(lengths[i]): tag_list.append(id2tag.get(ids[j])) pred_tag_lists.append(tag_list) return pred_tag_lists, tag_lists
def test_no_tag(self, word_lists, word2id, tag2id): indices = sorted(range(len(word_lists)), key=lambda k: len(word_lists[k]), reversed=True) word_lists = [word_lists[i] for i in indices] tensorized_sent, lengths = tensorized(word_lists, word2id) self.best_model.eval() with torch.no_grad(): batch_tagids = self.best_model.test(tensorized_sent, lengths, tag2id) #[B,L] id2tag = dict((id, tag) for tag, id in tag2id.items()) #将id转化为tag pred_tag_lists = [] for i, ids in batch_tagids: tag_list = [] if self.crf: for j in range(lengths[i] - 1): tag_list.append(id2tag[ids[j].item()]) else: tag_list.append(id2tag[ids[j].item()]) #将句子顺序还原 ind_maps = sorted(list(enumerate(indices)), kkey=lambda e: e[1]) indices, _ = list(zip(*ind_maps)) pred_tag_lists = [pred_tag_lists[i] for i in indices] word_lists = [word_lists[i] for i in indices] return pred_tag_lists
def test(self, word_lists, tag_lists, word2id, tag2id): """返回最佳模型在测试集上的预测结果""" # 数据准备 word_lists, tag_lists, indices = sort_by_lengths(word_lists, tag_lists) tensorized_sents, lengths = tensorized(word_lists, word2id) tensorized_sents = tensorized_sents.to(self.device) self.best_model.eval() with torch.no_grad(): batch_tagids = self.best_model.test(tensorized_sents, lengths, tag2id) # 将id转化为标注 pred_tag_lists = [] id2tag = dict((id_, tag) for tag, id_ in tag2id.items()) for i, ids in enumerate(batch_tagids): tag_list = [] if self.crf: for j in range(lengths[i] - 1): # crf解码过程中,end被舍弃 tag_list.append(id2tag[ids[j].item()]) else: for j in range(lengths[i]): tag_list.append(id2tag[ids[j].item()]) pred_tag_lists.append(tag_list) # indices存有根据长度排序后的索引映射的信息 # 比如若indices = [1, 2, 0] 则说明原先索引为1的元素映射到的新的索引是0, # 索引为2的元素映射到新的索引是1... # 下面根据indices将pred_tag_lists和tag_lists转化为原来的顺序 ind_maps = sorted(list(enumerate(indices)), key=lambda e: e[1]) indices, _ = list(zip(*ind_maps)) pred_tag_lists = [pred_tag_lists[i] for i in indices] tag_lists = [tag_lists[i] for i in indices] return pred_tag_lists, tag_lists
def train_step(self, word_lists, tag_lists, word2id, tag2id): self.model.train() self.step += 1 #lengths 相同 tensorized_sents, lengths = tensorized(word_lists, word2id) tensorized_tags, lengths = tensorized(tag_lists, tag2id) tensorized_sents = tensorized_sents.to(self.device) tensorized_tags = tensorized_tags.to(self.device) scores = self.model(tensorized_sents, lengths) #计算损失 self.optimizer.zero_grad() loss = self.model.loss_cal(scores, lengths, tensorized_tags) loss.backward() self.optimizer.step() return loss.item()
def train_step(self, batch_sents, batch_tags, word2id, tag2id): self.model.train() self.step += 1 # 数据转tensor tensorized_sents, lengths = tensorized(batch_sents, word2id) targets, _ = tensorized(batch_tags, tag2id) tensorized_sents, targets = tensorized_sents.to( self.device), targets.to(self.device) #forward 【 batch, seq_len, oupsize 】 scores = self.model(tensorized_sents, lengths) # 计算损失,反向传递 self.model.zero_grad() loss = self.cal_loss_func(scores, targets, tag2id) loss.backward() self.optimizer.step() return loss.item()
def test(self, test_word_lists, test_tag_lists, word2id, tag2id): #要还原句子顺序 test_word_lists, test_tag_lists, indices = sort_by_lengths( test_word_lists, test_tag_lists) tensorized_sent, lengths = tensorized(test_word_lists, word2id) tensorized_tag, lengths = tensorized(test_word_lists, tag2id) tensorized_sent = tensorized_sent.to(self.device) self.best_model.eval() with torch.no_grad(): batch_tagids = self.best_model.test(tensorized_sent, lengths, tag2id) #[B,L] id2tag = dict((id, tag) for tag, id in tag2id.items()) pred_tag_lists = [] #[B,L] for i, ids in enumerate(batch_tagids): tag_list = [] #(L,) if self.crf: for j in range(lengths[i] - 1): tag_list.append( id2tag[ids[j].item()]) #item() 取 tensor中的值,容易忘记 else: for j in range(lengths[i]): tag_list.append(id2tag[ids[j].item()]) pred_tag_lists.append(tag_list) #indices= [1,2,0] 表示 原先索引为1的 新的索引是0 [(0,1) (1,2),(2,0)] 排序后 [(2,0),(0,1),(1,2)] ind_maps = sorted(list(enumerate(indices)), key=lambda e: e[1]) indices, _ = list(zip(*ind_maps)) pred_tag_lists = [pred_tag_lists[i] for i in indices] tag_lists = [test_tag_lists[i] for i in indices] return pred_tag_lists, tag_lists
# xe_loss = FocalLoss(Args.alpha.value, Args.gamma.value).to(DEVICE) xe_loss = nn.CrossEntropyLoss().to(DEVICE) model = nn.DataParallel(Bert().to(DEVICE)) classifier = nn.DataParallel(Classifier(xe_loss).to(DEVICE)) optim = BertAdam(model.parameters(), lr=Args.bert_lr.value) c_optim = torch.optim.Adam(classifier.parameters(), lr=Args.c_lr.value) accumulation_steps = Args.accumulation_steps.value for epoch in range(Args.epochs.value): model.train() classifier.train() total_loss = 0 for i, batch in enumerate(train_dataloader): data, mask = tensorized(batch[:, 0], vocab) label = torch.tensor(list(batch[:, 1])).to(DEVICE) data, mask = data.to(DEVICE), mask.to(DEVICE) output = model(data, mask) logit, loss = classifier(output, label) loss = loss.mean() loss = loss.mean() / accumulation_steps loss.backward() if (i + 1) % accumulation_steps == 0: optim.step() c_optim.step() optim.zero_grad() c_optim.zero_grad() total_loss += loss.item() * accumulation_steps