def __init__(self, n_char, n_char_embed, n_char_out, n_vocab, n_embed, n_hidden, n_out, drop=0.5): super(CHAR_LSTM_CRF, self).__init__() self.embed = nn.Embedding(n_vocab, n_embed) # 字嵌入LSTM层 self.char_lstm = CharLSTM(n_char=n_char, n_embed=n_char_embed, n_out=n_char_out) # 词嵌入LSTM层 self.word_lstm = nn.LSTM(input_size=n_embed + n_char_out, hidden_size=n_hidden, batch_first=True, bidirectional=True) # 输出层 self.out = nn.Linear(n_hidden * 2, n_out) # CRF层 self.crf = CRF(n_out) self.drop = nn.Dropout(drop)
def __init__(self, n_vocab, n_embed, n_hidden, n_out, embed=None, drop=0.5): super(LSTM_CRF, self).__init__() if embed is None: self.embed = nn.Embedding(n_vocab, n_embed) else: self.embed = nn.Embedding.from_pretrained(embed, False) # 词嵌入LSTM层 self.lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden, batch_first=True, bidirectional=True) # 输出层 self.out = nn.Linear(n_hidden * 2, n_out) # CRF层 self.crf = CRF(n_out) self.drop = nn.Dropout(drop)
def __init__(self, n_context, n_vocab, n_embed, n_hidden, n_out, drop=0.5): super(BPNN_CRF, self).__init__() self.embed = nn.Embedding(n_vocab, n_embed) # 隐藏层 self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden), nn.ReLU()) # 输出层 self.out = nn.Linear(n_hidden, n_out) # CRF层 self.crf = CRF(n_out) self.drop = nn.Dropout(drop)
def __init__(self, args, word_dict, tag_dict, pos_dict=None, binary_dict=None): super(CnnLstmCrf, self).__init__() # Save dictionaries self.args = args self.word_dict = word_dict self.tag_dict = tag_dict self.pos_dict = pos_dict self.binary_dict = binary_dict # Pretrained Glove Wikipedia embeddings self.glove_emb = Embedding(len(word_dict), args.glove_dim) if args.use_glove: self.load_embeddings(self.glove_emb, args.glove_file, word_dict) # Pretrained Glove Twitter embeddings self.twitter_emb = Embedding(len(word_dict), args.twitter_dim) if args.use_twitter: self.load_embeddings(self.twitter_emb, args.twitter_file, word_dict) # POS embeddings if args.use_pos: self.pos_emb = Embedding(len(pos_dict), args.pos_dim) # ELMo pretrained BiLM embeddings if args.use_elmo: self.elmo = ELMO(args.elmo_dim, args.elmo_dropout) # Character byte encoder self.char_emb = Embedding(262, args.char_dim) self.char_cnn = CharCNN(args.char_dim, args.char_cnn_filters, args.char_cnn_ngrams) # Main LSTM input_dim = (args.glove_dim + args.twitter_dim + (args.pos_dim if args.use_pos else 0) + (args.elmo_dim if args.use_elmo else 0) + (len(binary_dict) if binary_dict else 0) + self.char_cnn.cnn.get_output_dim()) self.lstm = TokenLSTM(input_dim, args.lstm_dim, args.lstm_layers, args.lstm_dropout) # CRF layer self.crf = CRF(args.lstm_dim * 2, tag_dict[1])
def __init__(self, n_context, n_vocab, n_embed, n_hidden, n_out, embed=None, drop=0.5): super(BPNN_CRF, self).__init__() if embed is None: self.embed = nn.Embedding(n_vocab, n_embed) else: self.embed = nn.Embedding.from_pretrained(embed, False) # 隐藏层 self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden), nn.ReLU()) # 输出层 self.out = nn.Linear(n_hidden, n_out) # CRF层 self.crf = CRF(n_out) self.drop = nn.Dropout(drop)
def __init__(self, n_vocab, n_embed, n_hidden, n_out, drop=0.5): super(LSTM_CRF, self).__init__() self.embed = nn.Embedding(n_vocab, n_embed) # 词嵌入LSTM层 self.lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden, batch_first=True, bidirectional=True) # 输出层 self.out = nn.Linear(n_hidden * 2, n_out) # CRF层 self.crf = CRF(n_out) self.drop = nn.Dropout(drop)
class BPNN_CRF(nn.Module): def __init__(self, n_context, n_vocab, n_embed, n_hidden, n_out, drop=0.5): super(BPNN_CRF, self).__init__() self.embed = nn.Embedding(n_vocab, n_embed) # 隐藏层 self.hid = nn.Sequential(nn.Linear(n_embed * n_context, n_hidden), nn.ReLU()) # 输出层 self.out = nn.Linear(n_hidden, n_out) # CRF层 self.crf = CRF(n_out) self.drop = nn.Dropout(drop) def load_pretrained(self, embed): self.embed = nn.Embedding.from_pretrained(embed, False) def forward(self, x): B, T, N = x.shape # 获取词嵌入向量 x = self.embed(x).view(B, T, -1) x = self.hid(x) x = self.drop(x) return self.out(x) def fit(self, train_loader, dev_loader, test_loader, epochs, interval, eta, file): # 记录迭代时间 total_time = timedelta() # 记录最大准确率及对应的迭代次数 max_e, max_acc = 0, 0.0 # 设置优化器为Adam self.optimizer = optim.Adam(params=self.parameters(), lr=eta) for epoch in range(1, epochs + 1): start = datetime.now() # 更新参数 self.update(train_loader) print(f"Epoch: {epoch} / {epochs}:") loss, train_acc = self.evaluate(train_loader) print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}") loss, dev_acc = self.evaluate(dev_loader) print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}") loss, test_acc = self.evaluate(test_loader) print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}") t = datetime.now() - start print(f"{t}s elapsed\n") total_time += t # 保存效果最好的模型 if dev_acc > max_acc: torch.save(self, file) max_e, max_acc = epoch, dev_acc elif epoch - max_e >= interval: break print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}") print(f"mean time of each epoch is {total_time / epoch}s\n") def update(self, loader): # 设置为训练模式 self.train() # 从加载器中加载数据进行训练 for x, y, lens in loader: # 清除梯度 self.optimizer.zero_grad() # 获取掩码 mask = torch.arange(y.size(1)) < lens.unsqueeze(-1) target = y[mask] out = self(x) out = out.transpose(0, 1) # [T, B, N] y, mask = y.t(), mask.t() # [T, B] loss = self.crf(out, y, mask) # 计算梯度 loss.backward() # 更新参数 self.optimizer.step() @torch.no_grad() def evaluate(self, loader): # 设置为评价模式 self.eval() loss, tp, total = 0, 0, 0 # 从加载器中加载数据进行评价 for x, y, lens in loader: mask = torch.arange(y.size(1)) < lens.unsqueeze(-1) target = y[mask] out = self.forward(x) out = out.transpose(0, 1) # [T, B, N] y, mask = y.t(), mask.t() # [T, B] predict = self.crf.viterbi(out, mask) loss += self.crf(out, y, mask) tp += torch.sum(predict == target).item() total += lens.sum().item() loss /= len(loader) return loss, tp / total def collate_fn(self, data): x, y, lens = zip(*data) max_len = max(lens) x = torch.stack(x)[:, :max_len] y = torch.stack(y)[:, :max_len] lens = torch.tensor(lens) return x, y, lens
class LSTM_CRF(nn.Module): def __init__(self, n_vocab, n_embed, n_hidden, n_out, drop=0.5): super(LSTM_CRF, self).__init__() self.embed = nn.Embedding(n_vocab, n_embed) # 词嵌入LSTM层 self.lstm = nn.LSTM(input_size=n_embed, hidden_size=n_hidden, batch_first=True, bidirectional=True) # 输出层 self.out = nn.Linear(n_hidden * 2, n_out) # CRF层 self.crf = CRF(n_out) self.drop = nn.Dropout(drop) def load_pretrained(self, embed): self.embed = nn.Embedding.from_pretrained(embed, False) def forward(self, x, lens): B, T = x.shape # 获取词嵌入向量 x = self.embed(x) x = self.drop(x) x = pack_padded_sequence(x, lens, True) x, _ = self.lstm(x) x, _ = pad_packed_sequence(x, True) x = self.drop(x) return self.out(x) def fit(self, train_loader, dev_loader, test_loader, epochs, interval, eta, file): # 记录迭代时间 total_time = timedelta() # 记录最大准确率及对应的迭代次数 max_e, max_acc = 0, 0.0 # 设置优化器为Adam self.optimizer = optim.Adam(params=self.parameters(), lr=eta) for epoch in range(1, epochs + 1): start = datetime.now() # 更新参数 self.update(train_loader) print(f"Epoch: {epoch} / {epochs}:") loss, train_acc = self.evaluate(train_loader) print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}") loss, dev_acc = self.evaluate(dev_loader) print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}") loss, test_acc = self.evaluate(test_loader) print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}") t = datetime.now() - start print(f"{t}s elapsed\n") total_time += t # 保存效果最好的模型 if dev_acc > max_acc: torch.save(self, file) max_e, max_acc = epoch, dev_acc elif epoch - max_e >= interval: break print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}") print(f"mean time of each epoch is {total_time / epoch}s\n") def update(self, loader): # 设置为训练模式 self.train() # 从加载器中加载数据进行训练 for x, y, lens in loader: # 清除梯度 self.optimizer.zero_grad() # 获取掩码 mask = x.gt(0) target = y[mask] out = self(x, lens) out = out.transpose(0, 1) # [T, B, N] y, mask = y.t(), mask.t() # [T, B] loss = self.crf(out, y, mask) # 计算梯度 loss.backward() # 更新参数 self.optimizer.step() @torch.no_grad() def evaluate(self, loader): # 设置为评价模式 self.eval() loss, tp, total = 0, 0, 0 # 从加载器中加载数据进行评价 for x, y, lens in loader: mask = x.gt(0) target = y[mask] out = self.forward(x, lens) out = out.transpose(0, 1) # [T, B, N] y, mask = y.t(), mask.t() # [T, B] predict = self.crf.viterbi(out, mask) loss += self.crf(out, y, mask) tp += torch.sum(predict == target).item() total += lens.sum().item() loss /= len(loader) return loss, tp / total def collate_fn(self, data): x, y, lens = zip( *sorted(data, key=lambda x: x[-1], reverse=True) ) max_len = lens[0] x = torch.stack(x)[:, :max_len] y = torch.stack(y)[:, :max_len] lens = torch.tensor(lens) return x, y, lens
class CHAR_LSTM_CRF(nn.Module): def __init__(self, n_char, n_char_embed, n_char_out, n_vocab, n_embed, n_hidden, n_out, drop=0.5): super(CHAR_LSTM_CRF, self).__init__() self.embed = nn.Embedding(n_vocab, n_embed) # 字嵌入LSTM层 self.char_lstm = CharLSTM(n_char=n_char, n_embed=n_char_embed, n_out=n_char_out) # 词嵌入LSTM层 self.word_lstm = nn.LSTM(input_size=n_embed + n_char_out, hidden_size=n_hidden, batch_first=True, bidirectional=True) # 输出层 self.out = nn.Linear(n_hidden * 2, n_out) # CRF层 self.crf = CRF(n_out) self.drop = nn.Dropout(drop) def load_pretrained(self, embed): self.embed = nn.Embedding.from_pretrained(embed, False) def forward(self, x, char_x, lens): B, T = x.shape # 获取掩码 mask = x.gt(0) # 获取词嵌入向量 x = self.embed(x) # 获取字嵌入向量 char_x = self.char_lstm(char_x[mask]) char_x = pad_sequence(torch.split(char_x, lens.tolist()), True) # 获取词表示与字表示的拼接 x = torch.cat((x, char_x), dim=-1) x = self.drop(x) x = pack_padded_sequence(x, lens, True) x, _ = self.word_lstm(x) x, _ = pad_packed_sequence(x, True) x = self.drop(x) return self.out(x) def fit(self, train_loader, dev_loader, test_loader, epochs, interval, eta, file): # 记录迭代时间 total_time = timedelta() # 记录最大准确率及对应的迭代次数 max_e, max_acc = 0, 0.0 # 设置优化器为Adam self.optimizer = optim.Adam(params=self.parameters(), lr=eta) for epoch in range(1, epochs + 1): start = datetime.now() # 更新参数 self.update(train_loader) print(f"Epoch: {epoch} / {epochs}:") loss, train_acc = self.evaluate(train_loader) print(f"{'train:':<6} Loss: {loss:.4f} Accuracy: {train_acc:.2%}") loss, dev_acc = self.evaluate(dev_loader) print(f"{'dev:':<6} Loss: {loss:.4f} Accuracy: {dev_acc:.2%}") loss, test_acc = self.evaluate(test_loader) print(f"{'test:':<6} Loss: {loss:.4f} Accuracy: {test_acc:.2%}") t = datetime.now() - start print(f"{t}s elapsed\n") total_time += t # 保存效果最好的模型 if dev_acc > max_acc: torch.save(self, file) max_e, max_acc = epoch, dev_acc elif epoch - max_e >= interval: break print(f"max accuracy of dev is {max_acc:.2%} at epoch {max_e}") print(f"mean time of each epoch is {total_time / epoch}s\n") def update(self, loader): # 设置为训练模式 self.train() # 从加载器中加载数据进行训练 for x, y, char_x, lens in loader: # 清除梯度 self.optimizer.zero_grad() # 获取掩码 mask = x.gt(0) target = y[mask] out = self(x, char_x, lens) out = out.transpose(0, 1) # [T, B, N] y, mask = y.t(), mask.t() # [T, B] loss = self.crf(out, y, mask) # 计算梯度 loss.backward() # 更新参数 self.optimizer.step() @torch.no_grad() def evaluate(self, loader): # 设置为评价模式 self.eval() loss, tp, total = 0, 0, 0 # 从加载器中加载数据进行评价 for x, y, char_x, lens in loader: mask = x.gt(0) target = y[mask] out = self.forward(x, char_x, lens) out = out.transpose(0, 1) # [T, B, N] y, mask = y.t(), mask.t() # [T, B] predict = self.crf.viterbi(out, mask) loss += self.crf(out, y, mask) tp += torch.sum(predict == target).item() total += lens.sum().item() loss /= len(loader) return loss, tp / total def collate_fn(self, data): x, y, char_x, lens = zip( *sorted(data, key=lambda x: x[-1], reverse=True)) max_len = lens[0] x = torch.stack(x)[:, :max_len] y = torch.stack(y)[:, :max_len] char_x = torch.stack(char_x)[:, :max_len] lens = torch.tensor(lens) return x, y, char_x, lens