예제 #1
0
파일: bpnn_crf.py 프로젝트: zysite/post
class BPNN_CRF(nn.Module):

    def __init__(self, n_context, n_vocab, n_embed, n_hidden, n_out, drop=0.5):
        super(BPNN_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 隐藏层
        self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden),
                                 nn.ReLU())
        # 输出层
        self.out = nn.Linear(n_hidden, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)

    def load_pretrained(self, embed):
        self.embed = nn.Embedding.from_pretrained(embed, False)

    def forward(self, x):
        B, T, N = x.shape
        # 获取词嵌入向量
        x = self.embed(x).view(B, T, -1)

        x = self.hid(x)
        x = self.drop(x)

        return self.out(x)

    def fit(self, train_loader, dev_loader, test_loader,
            epochs, interval, eta, file):
        # 记录迭代时间
        total_time = timedelta()
        # 记录最大准确率及对应的迭代次数
        max_e, max_acc = 0, 0.0
        # 设置优化器为Adam
        self.optimizer = optim.Adam(params=self.parameters(), lr=eta)

        for epoch in range(1, epochs + 1):
            start = datetime.now()
            # 更新参数
            self.update(train_loader)

            print(f"Epoch: {epoch} / {epochs}:")
            loss, train_acc = self.evaluate(train_loader)
            print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}")
            loss, dev_acc = self.evaluate(dev_loader)
            print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}")
            loss, test_acc = self.evaluate(test_loader)
            print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}")
            t = datetime.now() - start
            print(f"{t}s elapsed\n")
            total_time += t

            # 保存效果最好的模型
            if dev_acc > max_acc:
                torch.save(self, file)
                max_e, max_acc = epoch, dev_acc
            elif epoch - max_e >= interval:
                break
        print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}")
        print(f"mean time of each epoch is {total_time / epoch}s\n")

    def update(self, loader):
        # 设置为训练模式
        self.train()

        # 从加载器中加载数据进行训练
        for x, y, lens in loader:
            # 清除梯度
            self.optimizer.zero_grad()
            # 获取掩码
            mask = torch.arange(y.size(1)) < lens.unsqueeze(-1)
            target = y[mask]

            out = self(x)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            loss = self.crf(out, y, mask)
            # 计算梯度
            loss.backward()
            # 更新参数
            self.optimizer.step()

    @torch.no_grad()
    def evaluate(self, loader):
        # 设置为评价模式
        self.eval()

        loss, tp, total = 0, 0, 0
        # 从加载器中加载数据进行评价
        for x, y, lens in loader:
            mask = torch.arange(y.size(1)) < lens.unsqueeze(-1)
            target = y[mask]

            out = self.forward(x)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            predict = self.crf.viterbi(out, mask)
            loss += self.crf(out, y, mask)
            tp += torch.sum(predict == target).item()
            total += lens.sum().item()
        loss /= len(loader)

        return loss, tp / total

    def collate_fn(self, data):
        x, y, lens = zip(*data)
        max_len = max(lens)
        x = torch.stack(x)[:, :max_len]
        y = torch.stack(y)[:, :max_len]
        lens = torch.tensor(lens)

        return x, y, lens
예제 #2
0
파일: lstm_crf.py 프로젝트: zysite/post
class LSTM_CRF(nn.Module):

    def __init__(self, n_vocab, n_embed, n_hidden, n_out, drop=0.5):
        super(LSTM_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 词嵌入LSTM层
        self.lstm = nn.LSTM(input_size=n_embed,
                            hidden_size=n_hidden,
                            batch_first=True,
                            bidirectional=True)

        # 输出层
        self.out = nn.Linear(n_hidden * 2, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)

    def load_pretrained(self, embed):
        self.embed = nn.Embedding.from_pretrained(embed, False)

    def forward(self, x, lens):
        B, T = x.shape
        # 获取词嵌入向量
        x = self.embed(x)
        x = self.drop(x)

        x = pack_padded_sequence(x, lens, True)
        x, _ = self.lstm(x)
        x, _ = pad_packed_sequence(x, True)
        x = self.drop(x)

        return self.out(x)

    def fit(self, train_loader, dev_loader, test_loader,
            epochs, interval, eta, file):
        # 记录迭代时间
        total_time = timedelta()
        # 记录最大准确率及对应的迭代次数
        max_e, max_acc = 0, 0.0
        # 设置优化器为Adam
        self.optimizer = optim.Adam(params=self.parameters(), lr=eta)

        for epoch in range(1, epochs + 1):
            start = datetime.now()
            # 更新参数
            self.update(train_loader)

            print(f"Epoch: {epoch} / {epochs}:")
            loss, train_acc = self.evaluate(train_loader)
            print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}")
            loss, dev_acc = self.evaluate(dev_loader)
            print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}")
            loss, test_acc = self.evaluate(test_loader)
            print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}")
            t = datetime.now() - start
            print(f"{t}s elapsed\n")
            total_time += t

            # 保存效果最好的模型
            if dev_acc > max_acc:
                torch.save(self, file)
                max_e, max_acc = epoch, dev_acc
            elif epoch - max_e >= interval:
                break
        print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}")
        print(f"mean time of each epoch is {total_time / epoch}s\n")

    def update(self, loader):
        # 设置为训练模式
        self.train()

        # 从加载器中加载数据进行训练
        for x, y, lens in loader:
            # 清除梯度
            self.optimizer.zero_grad()
            # 获取掩码
            mask = x.gt(0)
            target = y[mask]

            out = self(x, lens)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            loss = self.crf(out, y, mask)
            # 计算梯度
            loss.backward()
            # 更新参数
            self.optimizer.step()

    @torch.no_grad()
    def evaluate(self, loader):
        # 设置为评价模式
        self.eval()

        loss, tp, total = 0, 0, 0
        # 从加载器中加载数据进行评价
        for x, y, lens in loader:
            mask = x.gt(0)
            target = y[mask]

            out = self.forward(x, lens)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            predict = self.crf.viterbi(out, mask)
            loss += self.crf(out, y, mask)
            tp += torch.sum(predict == target).item()
            total += lens.sum().item()
        loss /= len(loader)

        return loss, tp / total

    def collate_fn(self, data):
        x, y, lens = zip(
            *sorted(data, key=lambda x: x[-1], reverse=True)
        )
        max_len = lens[0]
        x = torch.stack(x)[:, :max_len]
        y = torch.stack(y)[:, :max_len]
        lens = torch.tensor(lens)

        return x, y, lens
예제 #3
0
class BPNN_CRF(nn.Module):
    def __init__(self, n_context, n_vocab, n_embed, n_hidden, n_out, drop=0.5):
        super(BPNN_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 隐藏层
        self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden),
                                 nn.ReLU())
        # 输出层
        self.out = nn.Linear(n_hidden, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)

    def load_pretrained(self, embed):
        self.embed = nn.Embedding.from_pretrained(embed, False)

    def forward(self, x):
        B, T, N = x.shape
        # 获取词嵌入向量
        x = self.embed(x).view(B, T, -1)

        x = self.hid(x)
        x = self.drop(x)

        return self.out(x)

    def fit(self, train_loader, dev_loader, test_loader, epochs, interval, eta,
            file):
        # 记录迭代时间
        total_time = timedelta()
        # 记录最大准确率及对应的迭代次数
        max_e, max_acc = 0, 0.0
        # 设置优化器为Adam
        self.optimizer = optim.Adam(params=self.parameters(), lr=eta)

        for epoch in range(1, epochs + 1):
            start = datetime.now()
            # 更新参数
            self.update(train_loader)

            print(f"Epoch: {epoch} / {epochs}:")
            loss, train_acc = self.evaluate(train_loader)
            print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}")
            loss, dev_acc = self.evaluate(dev_loader)
            print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}")
            loss, test_acc = self.evaluate(test_loader)
            print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}")
            t = datetime.now() - start
            print(f"{t}s elapsed\n")
            total_time += t

            # 保存效果最好的模型
            if dev_acc > max_acc:
                torch.save(self, file)
                max_e, max_acc = epoch, dev_acc
            elif epoch - max_e >= interval:
                break
        print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}")
        print(f"mean time of each epoch is {total_time / epoch}s\n")

    def update(self, loader):
        # 设置为训练模式
        self.train()

        # 从加载器中加载数据进行训练
        for x, y, lens in loader:
            # 清除梯度
            self.optimizer.zero_grad()
            # 获取掩码
            mask = torch.arange(y.size(1)) < lens.unsqueeze(-1)
            target = y[mask]

            out = self(x)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            loss = self.crf(out, y, mask)
            # 计算梯度
            loss.backward()
            # 更新参数
            self.optimizer.step()

    @torch.no_grad()
    def evaluate(self, loader):
        # 设置为评价模式
        self.eval()

        loss, tp, total = 0, 0, 0
        # 从加载器中加载数据进行评价
        for x, y, lens in loader:
            mask = torch.arange(y.size(1)) < lens.unsqueeze(-1)
            target = y[mask]

            out = self.forward(x)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            predict = self.crf.viterbi(out, mask)
            loss += self.crf(out, y, mask)
            tp += torch.sum(predict == target).item()
            total += lens.sum().item()
        loss /= len(loader)

        return loss, tp / total

    def collate_fn(self, data):
        x, y, lens = zip(*data)
        max_len = max(lens)
        x = torch.stack(x)[:, :max_len]
        y = torch.stack(y)[:, :max_len]
        lens = torch.tensor(lens)

        return x, y, lens
예제 #4
0
class CHAR_LSTM_CRF(nn.Module):
    def __init__(self,
                 n_char,
                 n_char_embed,
                 n_char_out,
                 n_vocab,
                 n_embed,
                 n_hidden,
                 n_out,
                 drop=0.5):
        super(CHAR_LSTM_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 字嵌入LSTM层
        self.char_lstm = CharLSTM(n_char=n_char,
                                  n_embed=n_char_embed,
                                  n_out=n_char_out)

        # 词嵌入LSTM层
        self.word_lstm = nn.LSTM(input_size=n_embed + n_char_out,
                                 hidden_size=n_hidden,
                                 batch_first=True,
                                 bidirectional=True)

        # 输出层
        self.out = nn.Linear(n_hidden * 2, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)

    def load_pretrained(self, embed):
        self.embed = nn.Embedding.from_pretrained(embed, False)

    def forward(self, x, char_x, lens):
        B, T = x.shape
        # 获取掩码
        mask = x.gt(0)
        # 获取词嵌入向量
        x = self.embed(x)

        # 获取字嵌入向量
        char_x = self.char_lstm(char_x[mask])
        char_x = pad_sequence(torch.split(char_x, lens.tolist()), True)

        # 获取词表示与字表示的拼接
        x = torch.cat((x, char_x), dim=-1)
        x = self.drop(x)

        x = pack_padded_sequence(x, lens, True)
        x, _ = self.word_lstm(x)
        x, _ = pad_packed_sequence(x, True)
        x = self.drop(x)

        return self.out(x)

    def fit(self, train_loader, dev_loader, test_loader, epochs, interval, eta,
            file):
        # 记录迭代时间
        total_time = timedelta()
        # 记录最大准确率及对应的迭代次数
        max_e, max_acc = 0, 0.0
        # 设置优化器为Adam
        self.optimizer = optim.Adam(params=self.parameters(), lr=eta)

        for epoch in range(1, epochs + 1):
            start = datetime.now()
            # 更新参数
            self.update(train_loader)

            print(f"Epoch: {epoch} / {epochs}:")
            loss, train_acc = self.evaluate(train_loader)
            print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}")
            loss, dev_acc = self.evaluate(dev_loader)
            print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}")
            loss, test_acc = self.evaluate(test_loader)
            print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}")
            t = datetime.now() - start
            print(f"{t}s elapsed\n")
            total_time += t

            # 保存效果最好的模型
            if dev_acc > max_acc:
                torch.save(self, file)
                max_e, max_acc = epoch, dev_acc
            elif epoch - max_e >= interval:
                break
        print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}")
        print(f"mean time of each epoch is {total_time / epoch}s\n")

    def update(self, loader):
        # 设置为训练模式
        self.train()

        # 从加载器中加载数据进行训练
        for x, y, char_x, lens in loader:
            # 清除梯度
            self.optimizer.zero_grad()
            # 获取掩码
            mask = x.gt(0)
            target = y[mask]

            out = self(x, char_x, lens)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            loss = self.crf(out, y, mask)
            # 计算梯度
            loss.backward()
            # 更新参数
            self.optimizer.step()

    @torch.no_grad()
    def evaluate(self, loader):
        # 设置为评价模式
        self.eval()

        loss, tp, total = 0, 0, 0
        # 从加载器中加载数据进行评价
        for x, y, char_x, lens in loader:
            mask = x.gt(0)
            target = y[mask]

            out = self.forward(x, char_x, lens)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            predict = self.crf.viterbi(out, mask)
            loss += self.crf(out, y, mask)
            tp += torch.sum(predict == target).item()
            total += lens.sum().item()
        loss /= len(loader)

        return loss, tp / total

    def collate_fn(self, data):
        x, y, char_x, lens = zip(
            *sorted(data, key=lambda x: x[-1], reverse=True))
        max_len = lens[0]
        x = torch.stack(x)[:, :max_len]
        y = torch.stack(y)[:, :max_len]
        char_x = torch.stack(char_x)[:, :max_len]
        lens = torch.tensor(lens)

        return x, y, char_x, lens