class SeqModel(nn.Module): def __init__(self, data): super(SeqModel, self).__init__() self.data = data self.use_crf = data.use_crf print("build network...") print("word feature extractor: ", data.word_feature_extractor) self.gpu = data.HP_gpu self.average_batch = data.average_batch_loss # opinion 和 evidence 分开抽 label_size = data.label_alphabet_size self.word_hidden = WordSequence(data) if self.use_crf: self.word_crf = CRF(label_size, batch_first=True) if self.gpu: self.word_crf = self.word_crf.cuda() def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths, batch_label, mask, input_label_seq_tensor): lstm_outs = self.word_hidden(word_inputs, word_seq_lengths, input_label_seq_tensor) # lstm_outs(batch_size,sentence_length,tag_size) batch_size = word_inputs.size(0) if self.use_crf: mask = mask.byte() loss = (-self.word_crf(lstm_outs, batch_label, mask)) tag_seq = self.word_crf.decode(lstm_outs, mask) else: loss_function = nn.NLLLoss() seq_len = lstm_outs.size(1) lstm_outs = lstm_outs.view(batch_size * seq_len, -1) score = F.log_softmax(lstm_outs, 1) loss = loss_function( score, batch_label.contiguous().view(batch_size * seq_len)) _, tag_seq = torch.max(score, 1) tag_seq = tag_seq.view(batch_size, seq_len) return loss, tag_seq def evaluate(self, word_inputs, word_seq_lengths, mask, input_label_seq_tensor): lstm_outs = self.word_hidden(word_inputs, word_seq_lengths, input_label_seq_tensor) if self.use_crf: mask = mask.byte() tag_seq = self.word_crf.decode(lstm_outs, mask) else: batch_size = word_inputs.size(0) seq_len = lstm_outs.size(1) lstm_outs = lstm_outs.view(batch_size * seq_len, -1) _, tag_seq = torch.max(lstm_outs, 1) tag_seq = mask.long() * tag_seq.view(batch_size, seq_len) return tag_seq def forward(self, word_inputs, word_seq_lengths, mask, input_label_seq_tensor): return self.evaluate(word_inputs, word_seq_lengths, mask, input_label_seq_tensor)
class Sequence_Label(nn.Module): def __init__(self, config): super(Sequence_Label, self).__init__() self.num_labels = len(config.tag2idx) self._bert = Bert_CRF.from_pretrained(config.bert_model_dir, num_labels=self.num_labels) self.crf = CRF(self.num_labels, batch_first=True) def forward(self, input_ids, attention_mask, token_type_ids=None, labels=None): output = self._bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) attn_mask = attention_mask.type(torch.uint8) if labels is not None: loss = -self.crf( log_soft(output, 2), labels, mask=attn_mask, reduction='mean') return loss else: prediction = self.crf.decode(output, mask=attn_mask) return prediction
class ElmoNer(nn.Module): def __init__(self, num_units, rnn_hidden, num_tags, num_layers=1, use_cuda=False): super(ElmoNer, self).__init__() self.use_cuda = use_cuda self.embedding = Embedder(ELMO_PRETAIN_PATH) self.rnn = nn.GRU(num_units, rnn_hidden, num_layers=num_layers, batch_first=True, bidirectional=True) self.linear = nn.Linear(2 * rnn_hidden, num_tags) # self.linear = nn.Linear(num_units, num_tags) self.crf = CRF(num_tags) def forward(self, x_data, y_data, masks): """ 前向算法 :param x_data: :param y_data: :param masks: :return: """ encoded_layers = self.embedding.sents2elmo(x_data) out = self.rnn_layer(encoded_layers) loss = -1 * self.crf(out, y_data.transpose(0, 1), masks.transpose( 0, 1)) return loss def rnn_layer(self, encoded_layers): """ batch seq_len hidden :param encoded_layers: :return: batch seq_len class """ encoded_layers = np.array(encoded_layers) encoded_layers = torch.from_numpy(encoded_layers) if self.use_cuda: encoded_layers = encoded_layers.cuda() out, _ = self.rnn(encoded_layers) out = self.linear(out) out = out.transpose(0, 1) return out def test(self, x_data, masks): encoded_layers = self.embedding.sents2elmo(x_data) out = self.rnn_layer(encoded_layers) best_paths = self.crf.decode(out, mask=masks.transpose(0, 1)) return best_paths
class RNNCRF(nn.Module): def __init__(self, vocab_size, embed_size, num_units, num_layers, num_tag, pre_train, use_cuda): super(RNNCRF, self).__init__() self.num_tag = num_tag self.use_cuda = use_cuda self.crf = CRF(num_tag) self.embedding = nn.Embedding(vocab_size, embed_size, _weight=pre_train) self.rnn = nn.LSTM(embed_size, num_units, num_layers=num_layers, batch_first=True, bidirectional=True) self.linear = nn.Linear(2 * num_units, num_tag) def forward(self, x, y, seq_lens): """ 训练模型 :param x: :param y: batch * seq_len :param seq_lens: :return: """ emissions, mask = self.rnn_layer(x, seq_lens) loss = -1 * self.crf(emissions, y.transpose(0, 1), mask) return loss def test(self, x, y, seq_lens): emissions, mask = self.rnn_layer(x, seq_lens) loss = -1 * self.crf(emissions, y.transpose(0, 1), mask) best_paths = self.crf.decode(emissions, mask=mask) return loss, best_paths def rnn_layer(self, x, seq_lens): """ 输出发射概率 :param x: :param seq_lens: :return: seq_len batch_size 2*num_units """ batch_size, max_len = x.size() mask = create_mask(seq_lens, batch_size, max_len, self.use_cuda) embed = self.embedding(x) out, _ = self.rnn(embed) out = self.linear(out) out = out.transpose(0, 1) mask = mask.transpose(0, 1) return out, mask
class BiLSTM_CRF(nn.Module): def __init__(self, hyperparams): super(BiLSTM_CRF, self).__init__() self.lstm = BaselineModel(hyperparams) self.crf = CRF(hyperparams.num_classes, bos_tag_id=2, eos_tag_id=3, pad_tag_id=0, batch_first=True) self.hidden = None def forward(self, x, mask=None): emissions = self.lstm(x) score, path = self.crf.decode(emissions, mask=mask) return score, path def loss(self, x, y, mask=None): emissions = self.lstm(x) nll = self.crf(emissions, y, mask=mask) return nll def train_(self, optimizer, epochs, train_dataset): train_loss = 0.0 for _ in tqdm(range(epochs), desc='Training'): epoch_loss = 0. self.train() for _, samples in tqdm(enumerate(train_dataset), desc='Batches of data'): inputs, labels = samples['inputs'], samples['outputs'] optimizer.zero_grad() loss = self.loss(inputs, labels) loss.backward() optimizer.step() epoch_loss += loss.tolist() train_loss += epoch_loss / len(train_dataset) print(f"Train loss: {epoch_loss / len(train_dataset)}") return train_loss def predict(self, data_x, idx2label): data_x = torch.LongTensor(data_x[:100]) with torch.no_grad(): scores, seqs = self(data_x) for score, seq in zip(scores, seqs): str_seq = "".join([idx2label.get(x) for x in seq if x != 0]) # print(f'{score.item()}.2f: {str_seq}') print(f'{str_seq}')
class BertNer(nn.Module): def __init__(self, num_units, rnn_hidden, num_tags, num_layers=1): super(BertNer, self).__init__() self.bert_model = BertModel.from_pretrained(BERT_PRETAIN_PATH) self.rnn = nn.GRU(num_units, rnn_hidden, num_layers=num_layers, batch_first=True, bidirectional=True) self.linear = nn.Linear(2 * rnn_hidden, num_tags) # self.linear = nn.Linear(num_units, num_tags) self.crf = CRF(num_tags) def forward(self, x_data, y_data, masks, segment_ids): """ 前向算法 :param x_data: :param y_data: :param masks: :param segment_ids: :return: """ encoded_layers, _ = self.bert_model(x_data, segment_ids, masks, False) out = self.rnn_layer(encoded_layers) loss = -1 * self.crf(out, y_data.transpose(0, 1), masks.transpose( 0, 1)) return loss def rnn_layer(self, encoded_layers): out, _ = self.rnn(encoded_layers) out = self.linear(out) out = out.transpose(0, 1) return out def test(self, x_data, masks, segment_ids): encoded_layers, _ = self.bert_model(x_data, segment_ids, masks, False) out = self.rnn_layer(encoded_layers) best_paths = self.crf.decode(out, mask=masks.transpose(0, 1)) return best_paths
class JointModel(nn.Module): def __init__(self, data): super(JointModel, self).__init__() self.data = data self.use_crf = data.use_crf logger.info("build network...") logger.info("word feature extractor: %s" % data.word_feature_extractor) logger.info("use_cuda: %s" % data.HP_gpu) self.gpu = data.HP_gpu logger.info("use_crf: %s" % data.use_crf) self.average_batch = data.average_batch_loss label_size = data.label_alphabet_size sentence_size = data.sentence_alphabet_size self.word_hidden = JointSequence(data) if self.use_crf: self.word_crf = CRF(label_size, batch_first=True) self.sent_crf = CRF(sentence_size, batch_first=True) if self.gpu: self.word_crf = self.word_crf.cuda() self.sent_crf = self.sent_crf.cuda() def neg_log_likelihood_loss(self, word_inputs, word_tensor, word_seq_lengths, batch_label, batch_sent_type, mask, sent_mask, input_label_seq_tensor, input_sent_type_tensor, batch_word_recover, word_perm_idx, need_cat=True, need_embedding=True): words_outs, sent_out = self.word_hidden( word_inputs, word_tensor, word_seq_lengths, input_label_seq_tensor, input_sent_type_tensor, batch_word_recover, word_perm_idx, batch_sent_type, need_cat, need_embedding) batch_size = words_outs.size(0) seq_len = words_outs.size(1) if self.use_crf: # e_out(batch_size,sentence_length,tag_size) words_loss = (-self.word_crf(words_outs, batch_label, mask)) / ( len(word_seq_lengths) * seq_len) words_tag_seq = self.word_crf.decode(words_outs, mask) sent_total_loss = -self.sent_crf( sent_out, batch_sent_type[batch_word_recover].view( batch_size, 1), sent_mask.view(batch_size, 1).byte()) / len(sent_mask) sent_tag_seq = self.sent_crf.decode( sent_out, sent_mask.view(batch_size, 1).byte()) else: loss_function = nn.NLLLoss() words_outs = words_outs.view(batch_size * seq_len, -1) words_score = F.log_softmax(words_outs, 1) words_loss = loss_function( words_score, batch_label.contiguous().view(batch_size * seq_len)) _, words_tag_seq = torch.max(words_score, 1) words_tag_seq = words_tag_seq.view(batch_size, seq_len) sent_out = sent_out.view(batch_size, -1) sent_score = F.log_softmax(sent_out, 1) sent_total_loss = loss_function( sent_score, batch_sent_type[batch_word_recover].view(batch_size)) _, sent_tag_seq = torch.max(sent_score, 1) return words_loss, words_tag_seq, sent_total_loss, sent_tag_seq def evaluate(self, word_inputs, word_tensor, word_seq_lengths, batch_sent_type, mask, sent_mask, input_label_seq_tensor, input_sent_type_tensor, batch_word_recover, word_perm_idx, need_cat=True, need_embedding=True): words_out, sent_out = self.word_hidden( word_inputs, word_tensor, word_seq_lengths, input_label_seq_tensor, input_sent_type_tensor, batch_word_recover, word_perm_idx, batch_sent_type, need_cat, need_embedding) batch_size = words_out.size(0) seq_len = words_out.size(1) if self.use_crf: sent_tag_seq = self.sent_crf.decode( sent_out, sent_mask.view(batch_size, 1).byte()) # 由于sentence在预测分类时已经恢复了顺序,后面的word顺序还没有恢复,所以此时要继续打乱顺序 sent_tag_seq = torch.tensor(sent_tag_seq)[word_perm_idx] if self.gpu: sent_tag_seq = sent_tag_seq.cpu().data.numpy().tolist() else: sent_tag_seq = sent_tag_seq.data.numpy().tolist() words_tag_seq = self.word_crf.decode(words_out, mask) else: sent_out = sent_out.view(batch_size, -1) _, sent_tag_seq = torch.max(sent_out, 1) # 由于sentence在预测分类时已经恢复了顺序,后面的word顺序还没有恢复,所以此时要继续打乱顺序 sent_tag_seq = sent_tag_seq[word_perm_idx] words_out = words_out.view(batch_size * seq_len, -1) _, words_tag_seq = torch.max(words_out, 1) words_tag_seq = mask.long() * words_tag_seq.view( batch_size, seq_len) return words_tag_seq, sent_tag_seq def forward(self, word_inputs, word_tensor, word_seq_lengths, mask, sent_mask, input_label_seq_tensor, input_sent_type_tensor, batch_word_recover, word_perm_idx, need_cat=True, need_embedding=True): batch_size = word_tensor.size(0) seq_len = word_tensor.size(1) lstm_out, hidden, sent_out, label_embs = self.word_hidden.evaluate_sentence( word_inputs, word_tensor, word_seq_lengths, input_label_seq_tensor, input_sent_type_tensor, batch_word_recover, need_cat, need_embedding) lstm_out = torch.cat([ lstm_out, sent_out[word_perm_idx].expand( [lstm_out.size(0), lstm_out.size(1), sent_out.size(-1)]) ], -1) words_outs = self.word_hidden.evaluate_word(lstm_out, hidden, word_seq_lengths, label_embs) if self.use_crf: sent_tag_seq = self.sent_crf.decode( sent_out, sent_mask.view(batch_size, 1).byte()) # 由于sentence在预测分类时已经恢复了顺序,后面的word顺序还没有恢复,所以此时要继续打乱顺序 sent_tag_seq = torch.tensor(sent_tag_seq)[word_perm_idx] words_tag_seq = self.word_crf.decode(words_outs, mask) else: sent_out = sent_out.view(batch_size, -1) _, sent_tag_seq = torch.max(sent_out, 1) # 由于sentence在预测分类时已经恢复了顺序,后面的word顺序还没有恢复,所以此时要继续打乱顺序 sent_tag_seq = sent_tag_seq[word_perm_idx] words_outs = words_outs.view(batch_size * seq_len, -1) _, words_tag_seq = torch.max(words_outs, 1) words_tag_seq = mask.long() * words_tag_seq.view( batch_size, seq_len) return words_tag_seq, sent_tag_seq
class ConceptTagger(nn.Module): def __init__(self, config, embedding, word2Idx, label2Idx, description): super(ConceptTagger, self).__init__() self.embed_size = config.embed_size self.emb = embedding self.word2Idx = word2Idx self.label2Idx = label2Idx self.description = description self.use_crf = config.crf self.device = config.device self.config = config self.hidden_size1 = config.hidden_size1 self.hidden_size2 = config.hidden_size2 self.embedding = nn.Embedding.from_pretrained( torch.from_numpy(embedding.astype(np.float32)), padding_idx=word2Idx['<PAD>']) self.lstm1 = nn.LSTM(self.embed_size, self.hidden_size1, batch_first=True, bias=True, bidirectional=True) self.lstm2 = nn.LSTM(self.hidden_size1 * 2 + self.embed_size, self.hidden_size2, batch_first=True, bias=True, bidirectional=True) self.fc = nn.Linear(self.hidden_size2 * 2, 3, bias=True) self.dropout = nn.Dropout(config.dropout) if self.use_crf: self.crf = CRF(num_tags=3, batch_first=True) def Eval(self, x, slot): _x = copy.deepcopy(x) _slot = copy.deepcopy(slot) x = setMapping(x, self.word2Idx) lengths = [len(i) for i in x] slot = [self.description[i] for i in slot] slot = setMapping(slot, self.word2Idx) slot_len = [len(i) for i in slot] x = padData(x, cal_maxlen(x), self.word2Idx['<PAD>']) slot = padData(slot, cal_maxlen(slot), self.word2Idx['<PAD>']) x = torch.tensor(x, device=self.device) mask = (x != self.word2Idx['<PAD>']).byte() mask.to(self.device) slot = torch.tensor(slot, device=self.device) slot_len = torch.tensor(slot_len, device=self.device) x = self.embedding(x) slot = self.embedding(slot) packed = rnn_utils.pack_padded_sequence(input=x, lengths=lengths, batch_first=True, enforce_sorted=False) enc_hiddens, (last_hidden, last_cell) = self.lstm1(packed) x = rnn_utils.pad_packed_sequence(enc_hiddens, batch_first=True)[0] # print(slot.size()) slot = torch.sum(slot, 1) # print(slot.size()) slot_embedding = slot / slot_len.unsqueeze(1).type_as(slot) slot_embedding = slot_embedding.unsqueeze(1) slot_embedding = slot_embedding.expand(-1, x.size(1), -1) x = torch.cat((x, slot_embedding), -1) packed = rnn_utils.pack_padded_sequence(input=x, lengths=lengths, batch_first=True, enforce_sorted=False) enc_hiddens, (last_hidden, last_cell) = self.lstm2(packed) x = rnn_utils.pad_packed_sequence(enc_hiddens, batch_first=True)[0] x = self.dropout(x) x = self.fc(x) y_pad = None if not self.use_crf: y_pad = x.argmax(-1).detach().tolist() else: y_pad = self.crf.decode(x, mask) pred = [] id2label = {v: k for k, v in self.label2Idx.items()} for i in range(len(y_pad)): for j in range(len(y_pad[i])): y_pad[i][j] = id2label[y_pad[i][j]] pred.append(y_pad[i][:lengths[i]]) for i in range(len(pred)): for j in range(len(pred[i])): if pred[i][j] != 'O': pred[i][j] = pred[i][j] + '-' + _slot[i] return pred def forward(self, x, y, slot, Type): _x = copy.deepcopy(x) _y = copy.deepcopy(y) _slot = copy.deepcopy(slot) x = setMapping(x, self.word2Idx) lengths = [len(i) for i in x] y = setMapping(y, self.label2Idx) slot = [self.description[i[0]] for i in slot] slot = setMapping(slot, self.word2Idx) slot_len = [len(i) for i in slot] x = padData(x, cal_maxlen(x), self.word2Idx['<PAD>']) y = padData(y, cal_maxlen(y), self.label2Idx['O']) slot = padData(slot, cal_maxlen(slot), self.word2Idx['<PAD>']) x = torch.tensor(x, device=self.device) y = torch.tensor(y, device=self.device) mask = (x != self.word2Idx['<PAD>']).byte() mask.to(self.device) slot = torch.tensor(slot, device=self.device) slot_len = torch.tensor(slot_len, device=self.device) x = self.embedding(x) slot = self.embedding(slot) packed = rnn_utils.pack_padded_sequence(input=x, lengths=lengths, batch_first=True, enforce_sorted=False) enc_hiddens, (last_hidden, last_cell) = self.lstm1(packed) x = rnn_utils.pad_packed_sequence(enc_hiddens, batch_first=True)[0] slot = torch.sum(slot, 1) slot_embedding = slot / slot_len.unsqueeze(1).type_as(slot) slot_embedding = slot_embedding.unsqueeze(1) slot_embedding = slot_embedding.expand(-1, x.size(1), -1) x = torch.cat((x, slot_embedding), -1) packed = rnn_utils.pack_padded_sequence(input=x, lengths=lengths, batch_first=True, enforce_sorted=False) enc_hiddens, (last_hidden, last_cell) = self.lstm2(packed) x = rnn_utils.pad_packed_sequence(enc_hiddens, batch_first=True)[0] x = self.dropout(x) x = self.fc(x) if Type == 'train': if not self.use_crf: featas = x.view(x.size(0) * x.size(1), -1) y = y.view(-1) loss_func = nn.CrossEntropyLoss(size_average=True) loss = loss_func(featas, y) return loss else: loss = -self.crf(x, y, mask, 'mean') return loss else: y_pad = None if not self.use_crf: y_pad = x.argmax(-1).detach().tolist() else: y_pad = self.crf.decode(x, mask) pred = [] id2label = {v: k for k, v in self.label2Idx.items()} for i in range(len(y_pad)): for j in range(len(y_pad[i])): y_pad[i][j] = id2label[y_pad[i][j]] pred.append(y_pad[i][:lengths[i]]) assert len(_slot) == len(_y) and len(_x) == len(_y) for i in range(len(_x)): for j in range(len(_y[i])): if _y[i][j] != 'O': _y[i][j] = _y[i][j] + '-' + _slot[i][0] for i in range(len(pred)): for j in range(len(pred[i])): if pred[i][j] != 'O': pred[i][j] = pred[i][j] + '-' + _slot[i][0] return _x, _y, pred @staticmethod def load(model_path, device='cpu'): model_params = torch.load(model_path, map_location=lambda storage, loc: storage) if not os.path.exists( os.path.join(os.path.dirname(model_path), 'params')): raise Exception('params data error') params_path = os.path.join(os.path.dirname(model_path), 'params') params = torch.load(params_path, map_location=lambda storage, loc: storage) config = params['config'] word2Idx = params['word2Idx'] embedding = params['embedding'] description = params['description'] label2Idx = params['label2Idx'] config.device = device model = ConceptTagger(config=config, word2Idx=word2Idx, embedding=embedding, description=description, label2Idx=label2Idx) model.load_state_dict(model_params['state_dict']) return model def save(self, path): print('save model parameters to [%s]' % path) if not os.path.exists(os.path.join(os.path.dirname(path), 'params')): params_path = os.path.join(os.path.dirname(path), 'params') params = { 'config': self.config, 'embedding': self.emb, 'description': self.description, 'word2Idx': self.word2Idx, 'label2Idx': self.label2Idx } torch.save(params, params_path) model_params = {'state_dict': self.state_dict()} torch.save(model_params, path)