Beispiel #1
0
    def validate(self, dev_word_lists, dev_tag_lists, word2id, tag2id):
        self.model.eval()
        with torch.no_grad():
            val_losses = 0.
            val_step = 0
            for ind in range(0, len(dev_word_lists), self.batch_size):
                val_step += 1
                # 准备batch数据
                batch_sents = dev_word_lists[ind:ind + self.batch_size]
                batch_tags = dev_tag_lists[ind:ind + self.batch_size]
                tensorized_sents, lengths = tensorized(batch_sents, word2id)
                tensorized_sents = tensorized_sents.to(self.device)
                targets, lengths = tensorized(batch_tags, tag2id)
                targets = targets.to(self.device)

                # forward
                scores = self.model(tensorized_sents, lengths)

                # 计算损失
                loss = self.cal_loss_func(scores, targets,
                                          tag2id).to(self.device)
                val_losses += loss.item()
            val_loss = val_losses / val_step

            if val_loss < self._best_val_loss:
                print("保存模型...")
                self.best_model = deepcopy(self.model)
                self._best_val_loss = val_loss

            return val_loss
Beispiel #2
0
    def validate(self, dev_word_lists, dev_tag_lists, word2id, tag2id):
        self.model.eval()
        #不追踪梯度,节省内存
        with torch.no_grad():
            val_losses = 0.
            val_step = 0
            for ind in range(0, len(dev_word_lists), self.batch_size):
                val_step += 1
                batch_sents = dev_word_lists[ind:ind + self.batch_size]
                batch_tag = dev_tag_lists[ind:ind + self.batch_size]
                tensorized_sent, lengths = tensorized(batch_sents, word2id)
                tensorized_tag, lengths = tensorized(batch_tag, tag2id)

                tensorized_sent = tensorized_sent.to(self.device)
                tensorized_tag = tensorized_tag.to(self.device)

                scores = self.model(tensorized_sent, lengths)
                loss = self.model.loss_cal(scores, lengths, tensorized_tag)
                val_losses += loss.item()
            val_loss = val_losses / val_step  #每个batch随机损失值

            if val_loss < self._best_val_loss:
                print("Upgrade Model and save Model")

                self.best_model = deepcopy(self.model)  #deepcopy 深度复制,重新建立一个对象
                self._best_val_loss = val_loss

            return val_loss
Beispiel #3
0
    def test(self, test_word_lists, test_tag_lists, word2id, tag2id):
        test_word_lists, test_tag_lists, indices = sort_by_lengths(
            test_word_lists, test_tag_lists)
        tensorized_sent, lengths = tensorized(test_word_lists, word2id)
        tag_lists = [
            test_tag_list[:lengths[i]]
            for i, test_tag_list in enumerate(test_tag_lists)
        ]
        self.best_model.eval()
        pred_tagid_lists = []
        with torch.no_grad():
            B = self.batch_size
            for ind in range(0, len(test_word_lists), B):
                tensorized_batch_sent = tensorized_sent.to(self.device)
                batch_tagids = self.best_model.test(tensorized_batch_sent,
                                                    lengths, tag2id)  #[B,L]
                pred_tagid_lists += batch_tagids

        id2tag = dict((id, tag) for tag, id in tag2id.items())
        pred_tag_lists = []  #[B,L]
        for i, ids in enumerate(pred_tagid_lists):
            tag_list = []
            for j in range(lengths[i]):
                tag_list.append(id2tag.get(ids[j]))
            pred_tag_lists.append(tag_list)

        return pred_tag_lists, tag_lists
Beispiel #4
0
    def test_no_tag(self, word_lists, word2id, tag2id):
        indices = sorted(range(len(word_lists)),
                         key=lambda k: len(word_lists[k]),
                         reversed=True)
        word_lists = [word_lists[i] for i in indices]

        tensorized_sent, lengths = tensorized(word_lists, word2id)

        self.best_model.eval()
        with torch.no_grad():
            batch_tagids = self.best_model.test(tensorized_sent, lengths,
                                                tag2id)  #[B,L]
        id2tag = dict((id, tag) for tag, id in tag2id.items())

        #将id转化为tag
        pred_tag_lists = []
        for i, ids in batch_tagids:
            tag_list = []
            if self.crf:
                for j in range(lengths[i] - 1):
                    tag_list.append(id2tag[ids[j].item()])
            else:
                tag_list.append(id2tag[ids[j].item()])

        #将句子顺序还原

        ind_maps = sorted(list(enumerate(indices)), kkey=lambda e: e[1])
        indices, _ = list(zip(*ind_maps))

        pred_tag_lists = [pred_tag_lists[i] for i in indices]
        word_lists = [word_lists[i] for i in indices]

        return pred_tag_lists
Beispiel #5
0
    def test(self, word_lists, tag_lists, word2id, tag2id):
        """返回最佳模型在测试集上的预测结果"""
        # 数据准备
        word_lists, tag_lists, indices = sort_by_lengths(word_lists, tag_lists)
        tensorized_sents, lengths = tensorized(word_lists, word2id)
        tensorized_sents = tensorized_sents.to(self.device)

        self.best_model.eval()
        with torch.no_grad():
            batch_tagids = self.best_model.test(tensorized_sents, lengths,
                                                tag2id)

        # 将id转化为标注
        pred_tag_lists = []
        id2tag = dict((id_, tag) for tag, id_ in tag2id.items())
        for i, ids in enumerate(batch_tagids):
            tag_list = []
            if self.crf:
                for j in range(lengths[i] - 1):  # crf解码过程中,end被舍弃
                    tag_list.append(id2tag[ids[j].item()])
            else:
                for j in range(lengths[i]):
                    tag_list.append(id2tag[ids[j].item()])
            pred_tag_lists.append(tag_list)

        # indices存有根据长度排序后的索引映射的信息
        # 比如若indices = [1, 2, 0] 则说明原先索引为1的元素映射到的新的索引是0,
        # 索引为2的元素映射到新的索引是1...
        # 下面根据indices将pred_tag_lists和tag_lists转化为原来的顺序
        ind_maps = sorted(list(enumerate(indices)), key=lambda e: e[1])
        indices, _ = list(zip(*ind_maps))
        pred_tag_lists = [pred_tag_lists[i] for i in indices]
        tag_lists = [tag_lists[i] for i in indices]

        return pred_tag_lists, tag_lists
Beispiel #6
0
    def train_step(self, word_lists, tag_lists, word2id, tag2id):
        self.model.train()
        self.step += 1

        #lengths 相同
        tensorized_sents, lengths = tensorized(word_lists, word2id)
        tensorized_tags, lengths = tensorized(tag_lists, tag2id)

        tensorized_sents = tensorized_sents.to(self.device)
        tensorized_tags = tensorized_tags.to(self.device)
        scores = self.model(tensorized_sents, lengths)

        #计算损失
        self.optimizer.zero_grad()
        loss = self.model.loss_cal(scores, lengths, tensorized_tags)
        loss.backward()
        self.optimizer.step()

        return loss.item()
Beispiel #7
0
    def train_step(self, batch_sents, batch_tags, word2id, tag2id):
        self.model.train()
        self.step += 1

        # 数据转tensor
        tensorized_sents, lengths = tensorized(batch_sents, word2id)
        targets, _ = tensorized(batch_tags, tag2id)
        tensorized_sents, targets = tensorized_sents.to(
            self.device), targets.to(self.device)

        #forward  【 batch,  seq_len,  oupsize 】
        scores = self.model(tensorized_sents, lengths)

        # 计算损失,反向传递
        self.model.zero_grad()
        loss = self.cal_loss_func(scores, targets, tag2id)
        loss.backward()
        self.optimizer.step()

        return loss.item()
Beispiel #8
0
    def test(self, test_word_lists, test_tag_lists, word2id, tag2id):

        #要还原句子顺序
        test_word_lists, test_tag_lists, indices = sort_by_lengths(
            test_word_lists, test_tag_lists)

        tensorized_sent, lengths = tensorized(test_word_lists, word2id)
        tensorized_tag, lengths = tensorized(test_word_lists, tag2id)

        tensorized_sent = tensorized_sent.to(self.device)

        self.best_model.eval()
        with torch.no_grad():
            batch_tagids = self.best_model.test(tensorized_sent, lengths,
                                                tag2id)  #[B,L]
        id2tag = dict((id, tag) for tag, id in tag2id.items())
        pred_tag_lists = []  #[B,L]
        for i, ids in enumerate(batch_tagids):
            tag_list = []  #(L,)
            if self.crf:
                for j in range(lengths[i] - 1):
                    tag_list.append(
                        id2tag[ids[j].item()])  #item() 取 tensor中的值,容易忘记
            else:
                for j in range(lengths[i]):
                    tag_list.append(id2tag[ids[j].item()])

            pred_tag_lists.append(tag_list)

        #indices= [1,2,0] 表示 原先索引为1的 新的索引是0 [(0,1) (1,2),(2,0)] 排序后 [(2,0),(0,1),(1,2)]
        ind_maps = sorted(list(enumerate(indices)), key=lambda e: e[1])
        indices, _ = list(zip(*ind_maps))
        pred_tag_lists = [pred_tag_lists[i] for i in indices]
        tag_lists = [test_tag_lists[i] for i in indices]

        return pred_tag_lists, tag_lists
Beispiel #9
0
# xe_loss = FocalLoss(Args.alpha.value, Args.gamma.value).to(DEVICE)
xe_loss = nn.CrossEntropyLoss().to(DEVICE)

model = nn.DataParallel(Bert().to(DEVICE))
classifier = nn.DataParallel(Classifier(xe_loss).to(DEVICE))
optim = BertAdam(model.parameters(), lr=Args.bert_lr.value)
c_optim = torch.optim.Adam(classifier.parameters(), lr=Args.c_lr.value)
accumulation_steps = Args.accumulation_steps.value

for epoch in range(Args.epochs.value):
    model.train()
    classifier.train()
    total_loss = 0
    for i, batch in enumerate(train_dataloader):

        data, mask = tensorized(batch[:, 0], vocab)
        label = torch.tensor(list(batch[:, 1])).to(DEVICE)
        data, mask = data.to(DEVICE), mask.to(DEVICE)
        output = model(data, mask)
        logit, loss = classifier(output, label)
        loss = loss.mean()

        loss = loss.mean() / accumulation_steps
        loss.backward()
        if (i + 1) % accumulation_steps == 0:
            optim.step()
            c_optim.step()
            optim.zero_grad()
            c_optim.zero_grad()

        total_loss += loss.item() * accumulation_steps