예제 #1
0
    def __init__(self,
                 n_char,
                 n_char_embed,
                 n_char_out,
                 n_vocab,
                 n_embed,
                 n_hidden,
                 n_out,
                 drop=0.5):
        super(CHAR_LSTM_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 字嵌入LSTM层
        self.char_lstm = CharLSTM(n_char=n_char,
                                  n_embed=n_char_embed,
                                  n_out=n_char_out)

        # 词嵌入LSTM层
        self.word_lstm = nn.LSTM(input_size=n_embed + n_char_out,
                                 hidden_size=n_hidden,
                                 batch_first=True,
                                 bidirectional=True)

        # 输出层
        self.out = nn.Linear(n_hidden * 2, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)
예제 #2
0
    def __init__(self,
                 n_vocab,
                 n_embed,
                 n_hidden,
                 n_out,
                 embed=None,
                 drop=0.5):
        super(LSTM_CRF, self).__init__()

        if embed is None:
            self.embed = nn.Embedding(n_vocab, n_embed)
        else:
            self.embed = nn.Embedding.from_pretrained(embed, False)

        # 词嵌入LSTM层
        self.lstm = nn.LSTM(input_size=n_embed,
                            hidden_size=n_hidden,
                            batch_first=True,
                            bidirectional=True)

        # 输出层
        self.out = nn.Linear(n_hidden * 2, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)
예제 #3
0
    def __init__(self, n_context, n_vocab, n_embed, n_hidden, n_out, drop=0.5):
        super(BPNN_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 隐藏层
        self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden),
                                 nn.ReLU())
        # 输出层
        self.out = nn.Linear(n_hidden, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)
예제 #4
0
파일: bpnn_crf.py 프로젝트: zysite/post
    def __init__(self, n_context, n_vocab, n_embed, n_hidden, n_out, drop=0.5):
        super(BPNN_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 隐藏层
        self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden),
                                 nn.ReLU())
        # 输出层
        self.out = nn.Linear(n_hidden, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)
예제 #5
0
    def __init__(self,
                 args,
                 word_dict,
                 tag_dict,
                 pos_dict=None,
                 binary_dict=None):
        super(CnnLstmCrf, self).__init__()

        # Save dictionaries
        self.args = args
        self.word_dict = word_dict
        self.tag_dict = tag_dict
        self.pos_dict = pos_dict
        self.binary_dict = binary_dict

        # Pretrained Glove Wikipedia embeddings
        self.glove_emb = Embedding(len(word_dict), args.glove_dim)
        if args.use_glove:
            self.load_embeddings(self.glove_emb, args.glove_file, word_dict)

        # Pretrained Glove Twitter embeddings
        self.twitter_emb = Embedding(len(word_dict), args.twitter_dim)
        if args.use_twitter:
            self.load_embeddings(self.twitter_emb, args.twitter_file,
                                 word_dict)

        # POS embeddings
        if args.use_pos:
            self.pos_emb = Embedding(len(pos_dict), args.pos_dim)

        # ELMo pretrained BiLM embeddings
        if args.use_elmo:
            self.elmo = ELMO(args.elmo_dim, args.elmo_dropout)

        # Character byte encoder
        self.char_emb = Embedding(262, args.char_dim)
        self.char_cnn = CharCNN(args.char_dim, args.char_cnn_filters,
                                args.char_cnn_ngrams)

        # Main LSTM
        input_dim = (args.glove_dim + args.twitter_dim +
                     (args.pos_dim if args.use_pos else 0) +
                     (args.elmo_dim if args.use_elmo else 0) +
                     (len(binary_dict) if binary_dict else 0) +
                     self.char_cnn.cnn.get_output_dim())
        self.lstm = TokenLSTM(input_dim, args.lstm_dim, args.lstm_layers,
                              args.lstm_dropout)

        # CRF layer
        self.crf = CRF(args.lstm_dim * 2, tag_dict[1])
예제 #6
0
파일: bpnn_crf.py 프로젝트: ThisIsSoMe/post
    def __init__(self,
                 n_context,
                 n_vocab,
                 n_embed,
                 n_hidden,
                 n_out,
                 embed=None,
                 drop=0.5):
        super(BPNN_CRF, self).__init__()

        if embed is None:
            self.embed = nn.Embedding(n_vocab, n_embed)
        else:
            self.embed = nn.Embedding.from_pretrained(embed, False)

        # 隐藏层
        self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden),
                                 nn.ReLU())
        # 输出层
        self.out = nn.Linear(n_hidden, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)
예제 #7
0
파일: lstm_crf.py 프로젝트: zysite/post
    def __init__(self, n_vocab, n_embed, n_hidden, n_out, drop=0.5):
        super(LSTM_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 词嵌入LSTM层
        self.lstm = nn.LSTM(input_size=n_embed,
                            hidden_size=n_hidden,
                            batch_first=True,
                            bidirectional=True)

        # 输出层
        self.out = nn.Linear(n_hidden * 2, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)
예제 #8
0
파일: bpnn_crf.py 프로젝트: zysite/post
class BPNN_CRF(nn.Module):

    def __init__(self, n_context, n_vocab, n_embed, n_hidden, n_out, drop=0.5):
        super(BPNN_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 隐藏层
        self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden),
                                 nn.ReLU())
        # 输出层
        self.out = nn.Linear(n_hidden, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)

    def load_pretrained(self, embed):
        self.embed = nn.Embedding.from_pretrained(embed, False)

    def forward(self, x):
        B, T, N = x.shape
        # 获取词嵌入向量
        x = self.embed(x).view(B, T, -1)

        x = self.hid(x)
        x = self.drop(x)

        return self.out(x)

    def fit(self, train_loader, dev_loader, test_loader,
            epochs, interval, eta, file):
        # 记录迭代时间
        total_time = timedelta()
        # 记录最大准确率及对应的迭代次数
        max_e, max_acc = 0, 0.0
        # 设置优化器为Adam
        self.optimizer = optim.Adam(params=self.parameters(), lr=eta)

        for epoch in range(1, epochs + 1):
            start = datetime.now()
            # 更新参数
            self.update(train_loader)

            print(f"Epoch: {epoch} / {epochs}:")
            loss, train_acc = self.evaluate(train_loader)
            print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}")
            loss, dev_acc = self.evaluate(dev_loader)
            print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}")
            loss, test_acc = self.evaluate(test_loader)
            print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}")
            t = datetime.now() - start
            print(f"{t}s elapsed\n")
            total_time += t

            # 保存效果最好的模型
            if dev_acc > max_acc:
                torch.save(self, file)
                max_e, max_acc = epoch, dev_acc
            elif epoch - max_e >= interval:
                break
        print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}")
        print(f"mean time of each epoch is {total_time / epoch}s\n")

    def update(self, loader):
        # 设置为训练模式
        self.train()

        # 从加载器中加载数据进行训练
        for x, y, lens in loader:
            # 清除梯度
            self.optimizer.zero_grad()
            # 获取掩码
            mask = torch.arange(y.size(1)) < lens.unsqueeze(-1)
            target = y[mask]

            out = self(x)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            loss = self.crf(out, y, mask)
            # 计算梯度
            loss.backward()
            # 更新参数
            self.optimizer.step()

    @torch.no_grad()
    def evaluate(self, loader):
        # 设置为评价模式
        self.eval()

        loss, tp, total = 0, 0, 0
        # 从加载器中加载数据进行评价
        for x, y, lens in loader:
            mask = torch.arange(y.size(1)) < lens.unsqueeze(-1)
            target = y[mask]

            out = self.forward(x)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            predict = self.crf.viterbi(out, mask)
            loss += self.crf(out, y, mask)
            tp += torch.sum(predict == target).item()
            total += lens.sum().item()
        loss /= len(loader)

        return loss, tp / total

    def collate_fn(self, data):
        x, y, lens = zip(*data)
        max_len = max(lens)
        x = torch.stack(x)[:, :max_len]
        y = torch.stack(y)[:, :max_len]
        lens = torch.tensor(lens)

        return x, y, lens
예제 #9
0
class BPNN_CRF(nn.Module):
    def __init__(self, n_context, n_vocab, n_embed, n_hidden, n_out, drop=0.5):
        super(BPNN_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 隐藏层
        self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden),
                                 nn.ReLU())
        # 输出层
        self.out = nn.Linear(n_hidden, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)

    def load_pretrained(self, embed):
        self.embed = nn.Embedding.from_pretrained(embed, False)

    def forward(self, x):
        B, T, N = x.shape
        # 获取词嵌入向量
        x = self.embed(x).view(B, T, -1)

        x = self.hid(x)
        x = self.drop(x)

        return self.out(x)

    def fit(self, train_loader, dev_loader, test_loader, epochs, interval, eta,
            file):
        # 记录迭代时间
        total_time = timedelta()
        # 记录最大准确率及对应的迭代次数
        max_e, max_acc = 0, 0.0
        # 设置优化器为Adam
        self.optimizer = optim.Adam(params=self.parameters(), lr=eta)

        for epoch in range(1, epochs + 1):
            start = datetime.now()
            # 更新参数
            self.update(train_loader)

            print(f"Epoch: {epoch} / {epochs}:")
            loss, train_acc = self.evaluate(train_loader)
            print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}")
            loss, dev_acc = self.evaluate(dev_loader)
            print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}")
            loss, test_acc = self.evaluate(test_loader)
            print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}")
            t = datetime.now() - start
            print(f"{t}s elapsed\n")
            total_time += t

            # 保存效果最好的模型
            if dev_acc > max_acc:
                torch.save(self, file)
                max_e, max_acc = epoch, dev_acc
            elif epoch - max_e >= interval:
                break
        print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}")
        print(f"mean time of each epoch is {total_time / epoch}s\n")

    def update(self, loader):
        # 设置为训练模式
        self.train()

        # 从加载器中加载数据进行训练
        for x, y, lens in loader:
            # 清除梯度
            self.optimizer.zero_grad()
            # 获取掩码
            mask = torch.arange(y.size(1)) < lens.unsqueeze(-1)
            target = y[mask]

            out = self(x)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            loss = self.crf(out, y, mask)
            # 计算梯度
            loss.backward()
            # 更新参数
            self.optimizer.step()

    @torch.no_grad()
    def evaluate(self, loader):
        # 设置为评价模式
        self.eval()

        loss, tp, total = 0, 0, 0
        # 从加载器中加载数据进行评价
        for x, y, lens in loader:
            mask = torch.arange(y.size(1)) < lens.unsqueeze(-1)
            target = y[mask]

            out = self.forward(x)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            predict = self.crf.viterbi(out, mask)
            loss += self.crf(out, y, mask)
            tp += torch.sum(predict == target).item()
            total += lens.sum().item()
        loss /= len(loader)

        return loss, tp / total

    def collate_fn(self, data):
        x, y, lens = zip(*data)
        max_len = max(lens)
        x = torch.stack(x)[:, :max_len]
        y = torch.stack(y)[:, :max_len]
        lens = torch.tensor(lens)

        return x, y, lens
예제 #10
0
파일: lstm_crf.py 프로젝트: zysite/post
class LSTM_CRF(nn.Module):

    def __init__(self, n_vocab, n_embed, n_hidden, n_out, drop=0.5):
        super(LSTM_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 词嵌入LSTM层
        self.lstm = nn.LSTM(input_size=n_embed,
                            hidden_size=n_hidden,
                            batch_first=True,
                            bidirectional=True)

        # 输出层
        self.out = nn.Linear(n_hidden * 2, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)

    def load_pretrained(self, embed):
        self.embed = nn.Embedding.from_pretrained(embed, False)

    def forward(self, x, lens):
        B, T = x.shape
        # 获取词嵌入向量
        x = self.embed(x)
        x = self.drop(x)

        x = pack_padded_sequence(x, lens, True)
        x, _ = self.lstm(x)
        x, _ = pad_packed_sequence(x, True)
        x = self.drop(x)

        return self.out(x)

    def fit(self, train_loader, dev_loader, test_loader,
            epochs, interval, eta, file):
        # 记录迭代时间
        total_time = timedelta()
        # 记录最大准确率及对应的迭代次数
        max_e, max_acc = 0, 0.0
        # 设置优化器为Adam
        self.optimizer = optim.Adam(params=self.parameters(), lr=eta)

        for epoch in range(1, epochs + 1):
            start = datetime.now()
            # 更新参数
            self.update(train_loader)

            print(f"Epoch: {epoch} / {epochs}:")
            loss, train_acc = self.evaluate(train_loader)
            print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}")
            loss, dev_acc = self.evaluate(dev_loader)
            print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}")
            loss, test_acc = self.evaluate(test_loader)
            print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}")
            t = datetime.now() - start
            print(f"{t}s elapsed\n")
            total_time += t

            # 保存效果最好的模型
            if dev_acc > max_acc:
                torch.save(self, file)
                max_e, max_acc = epoch, dev_acc
            elif epoch - max_e >= interval:
                break
        print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}")
        print(f"mean time of each epoch is {total_time / epoch}s\n")

    def update(self, loader):
        # 设置为训练模式
        self.train()

        # 从加载器中加载数据进行训练
        for x, y, lens in loader:
            # 清除梯度
            self.optimizer.zero_grad()
            # 获取掩码
            mask = x.gt(0)
            target = y[mask]

            out = self(x, lens)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            loss = self.crf(out, y, mask)
            # 计算梯度
            loss.backward()
            # 更新参数
            self.optimizer.step()

    @torch.no_grad()
    def evaluate(self, loader):
        # 设置为评价模式
        self.eval()

        loss, tp, total = 0, 0, 0
        # 从加载器中加载数据进行评价
        for x, y, lens in loader:
            mask = x.gt(0)
            target = y[mask]

            out = self.forward(x, lens)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            predict = self.crf.viterbi(out, mask)
            loss += self.crf(out, y, mask)
            tp += torch.sum(predict == target).item()
            total += lens.sum().item()
        loss /= len(loader)

        return loss, tp / total

    def collate_fn(self, data):
        x, y, lens = zip(
            *sorted(data, key=lambda x: x[-1], reverse=True)
        )
        max_len = lens[0]
        x = torch.stack(x)[:, :max_len]
        y = torch.stack(y)[:, :max_len]
        lens = torch.tensor(lens)

        return x, y, lens
예제 #11
0
class CHAR_LSTM_CRF(nn.Module):
    def __init__(self,
                 n_char,
                 n_char_embed,
                 n_char_out,
                 n_vocab,
                 n_embed,
                 n_hidden,
                 n_out,
                 drop=0.5):
        super(CHAR_LSTM_CRF, self).__init__()

        self.embed = nn.Embedding(n_vocab, n_embed)
        # 字嵌入LSTM层
        self.char_lstm = CharLSTM(n_char=n_char,
                                  n_embed=n_char_embed,
                                  n_out=n_char_out)

        # 词嵌入LSTM层
        self.word_lstm = nn.LSTM(input_size=n_embed + n_char_out,
                                 hidden_size=n_hidden,
                                 batch_first=True,
                                 bidirectional=True)

        # 输出层
        self.out = nn.Linear(n_hidden * 2, n_out)
        # CRF层
        self.crf = CRF(n_out)

        self.drop = nn.Dropout(drop)

    def load_pretrained(self, embed):
        self.embed = nn.Embedding.from_pretrained(embed, False)

    def forward(self, x, char_x, lens):
        B, T = x.shape
        # 获取掩码
        mask = x.gt(0)
        # 获取词嵌入向量
        x = self.embed(x)

        # 获取字嵌入向量
        char_x = self.char_lstm(char_x[mask])
        char_x = pad_sequence(torch.split(char_x, lens.tolist()), True)

        # 获取词表示与字表示的拼接
        x = torch.cat((x, char_x), dim=-1)
        x = self.drop(x)

        x = pack_padded_sequence(x, lens, True)
        x, _ = self.word_lstm(x)
        x, _ = pad_packed_sequence(x, True)
        x = self.drop(x)

        return self.out(x)

    def fit(self, train_loader, dev_loader, test_loader, epochs, interval, eta,
            file):
        # 记录迭代时间
        total_time = timedelta()
        # 记录最大准确率及对应的迭代次数
        max_e, max_acc = 0, 0.0
        # 设置优化器为Adam
        self.optimizer = optim.Adam(params=self.parameters(), lr=eta)

        for epoch in range(1, epochs + 1):
            start = datetime.now()
            # 更新参数
            self.update(train_loader)

            print(f"Epoch: {epoch} / {epochs}:")
            loss, train_acc = self.evaluate(train_loader)
            print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}")
            loss, dev_acc = self.evaluate(dev_loader)
            print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}")
            loss, test_acc = self.evaluate(test_loader)
            print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}")
            t = datetime.now() - start
            print(f"{t}s elapsed\n")
            total_time += t

            # 保存效果最好的模型
            if dev_acc > max_acc:
                torch.save(self, file)
                max_e, max_acc = epoch, dev_acc
            elif epoch - max_e >= interval:
                break
        print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}")
        print(f"mean time of each epoch is {total_time / epoch}s\n")

    def update(self, loader):
        # 设置为训练模式
        self.train()

        # 从加载器中加载数据进行训练
        for x, y, char_x, lens in loader:
            # 清除梯度
            self.optimizer.zero_grad()
            # 获取掩码
            mask = x.gt(0)
            target = y[mask]

            out = self(x, char_x, lens)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            loss = self.crf(out, y, mask)
            # 计算梯度
            loss.backward()
            # 更新参数
            self.optimizer.step()

    @torch.no_grad()
    def evaluate(self, loader):
        # 设置为评价模式
        self.eval()

        loss, tp, total = 0, 0, 0
        # 从加载器中加载数据进行评价
        for x, y, char_x, lens in loader:
            mask = x.gt(0)
            target = y[mask]

            out = self.forward(x, char_x, lens)
            out = out.transpose(0, 1)  # [T, B, N]
            y, mask = y.t(), mask.t()  # [T, B]
            predict = self.crf.viterbi(out, mask)
            loss += self.crf(out, y, mask)
            tp += torch.sum(predict == target).item()
            total += lens.sum().item()
        loss /= len(loader)

        return loss, tp / total

    def collate_fn(self, data):
        x, y, char_x, lens = zip(
            *sorted(data, key=lambda x: x[-1], reverse=True))
        max_len = lens[0]
        x = torch.stack(x)[:, :max_len]
        y = torch.stack(y)[:, :max_len]
        char_x = torch.stack(char_x)[:, :max_len]
        lens = torch.tensor(lens)

        return x, y, char_x, lens