class BiLSTM_CRF(nn.Module): def __init__(self, data): super(BiLSTM_CRF, self).__init__() print ("build batched lstmcrf...") self.gpu = data.HP_gpu ## add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu) def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask): outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return total_loss, tag_seq def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask): outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return tag_seq def get_lstm_features(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover): return self.lstm.get_lstm_features(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
class SeqLabel(nn.Module): def __init__(self, data): super(SeqLabel, self).__init__() label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.word_hidden = WordSequence(data) self.crf = CRF(label_size, data.gpu) def calculate_loss(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask): outs = self.word_hidden(word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return total_loss, tag_seq def forward(self, word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask): outs = self.word_hidden(word_inputs, feature_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover) batch_size = word_inputs.size(0) seq_len = word_inputs.size(1) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return tag_seq
def __init__(self, bert_path, bert_dim, n_class, drop_p, num_pre): super(NER_Model, self).__init__() self.bert_model = BertModel.from_pretrained(bert_path) self.fc = nn.Linear(bert_dim * 2, n_class) self.dropout = nn.Dropout(drop_p) self.device = torch.device( 'cuda' if torch.cuda.is_available() else 'cpu') # pre embedding self.pre_dim = bert_dim self.pre_embedding = nn.Embedding(num_pre, self.pre_dim) self.pre_embedding.weight.data.copy_( torch.from_numpy( self.random_embedding_label(num_pre, self.pre_dim, 0.025))) # transformer self.enc_positional_encoding = positional_encoding(768, zeros_pad=True, scale=True) for i in range(hp.num_blocks): self.__setattr__( 'enc_self_attention_%d' % i, multihead_attention(num_units=hp.hidden_units, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, causality=False)) self.__setattr__( 'enc_feed_forward_%d' % i, feedforward(hp.hidden_units, [4 * hp.hidden_units, hp.hidden_units])) # crf self.crf = CRF(n_class, use_cuda=True if torch.cuda.is_available() else False)
class BiLstmCrf(nn.Module): def __init__(self, data, configs): super(BiLstmCrf, self).__init__() if configs['random_embedding']: self.word_embeddings = nn.Embedding(data.word_alphabet_size, configs['word_emb_dim']) self.word_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet_size, configs['word_emb_dim']))) self.word_drop = nn.Dropout(configs['dropout']) else: pass self.lstm = nn.LSTM(configs['word_emb_dim'], configs['hidden_dim'] // 2, num_layers=configs['num_layers'], batch_first=configs['batch_first'], bidirectional=configs['bidirectional']) self.drop_lstm = nn.Dropout(configs['dropout']) # data.label_alphabet_size大小比label数量大1,是合理的,与label_alphabet的初始化策略有关 # data.train_ids中,没有一个label值是0,所以softmax_logits[0]也一定是一个非常小的值,取不到 self.hidden2tag = nn.Linear(configs['hidden_dim'], data.label_alphabet_size + 2) self.crf = CRF(data.label_alphabet_size, configs['gpu']) def forward(self, batch_input, batch_len, batch_recover, mask, batch_label=None): word_embeds = self.word_drop(self.word_embeddings(batch_input)) packed_words = pack_padded_sequence(word_embeds, batch_len.cpu().numpy(), batch_first=True) hidden = None lstm_out, hidden = self.lstm(packed_words, hidden) lstm_out, _ = pad_packed_sequence(lstm_out) lstm_out = self.drop_lstm(lstm_out.transpose(1, 0)) outputs = self.hidden2tag(lstm_out) if batch_label is not None: total_loss = self.crf.neg_log_likelihood_loss( outputs, mask, batch_label) scores, tag_seq = self.crf._viterbi_decode(outputs, mask) return total_loss, tag_seq else: scores, tag_seq = self.crf._viterbi_decode(outputs, mask) return tag_seq @staticmethod def random_embedding(vocab_size, embedding_dim): pretrain_emb = np.empty([vocab_size, embedding_dim]) scale = np.sqrt(3.0 / embedding_dim) for index in range(vocab_size): pretrain_emb[index, :] = np.random.uniform(-scale, scale, [1, embedding_dim]) return pretrain_emb
def __init__(self, data): super(CnnLstmCrf, self).__init__() self.char_embeddings = nn.Embedding(data.char_alphabet_size, config.char_emb_dim) self.char_embeddings.weight.data.copy_( torch.from_numpy(self.random_embedding(data.char_alphabet_size, config.char_emb_dim))) self.char_drop = nn.Dropout(config.dropout) self.char_cnn = nn.Conv1d( in_channels=config.char_emb_dim, out_channels=config.char_hidden_dim, kernel_size=3, padding=1) self.word_embeddings = nn.Embedding(data.word_alphabet_size, config.word_emb_dim) self.word_embeddings.weight.data.copy_( torch.from_numpy(self.random_embedding(data.word_alphabet_size, config.word_emb_dim))) self.word_drop = nn.Dropout(config.dropout) self.feature_embeddings = nn.Embedding(data.feat_alphabet_size, config.feature_emb_dim) # 加载预训练的feat_emb: if len(data.pretrain_feature_embeddings) > 1: self.feature_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_feature_embeddings)) self.lstm = nn.LSTM( config.char_hidden_dim + config.word_emb_dim + config.feature_emb_dim, config.hidden_dim // 2, num_layers=1, batch_first=True, bidirectional=True) self.droplstm = nn.Dropout(config.dropout) self.hidden2tag = nn.Linear(config.hidden_dim, data.label_alphabet_size + 2) # label_size + 2 (crf的start和end) self.crf = CRF(data.label_alphabet_size, config.gpu)
def __init__(self, config, embedding, word2Idx, label2Idx, description): super(ConceptTagger, self).__init__() self.embed_size = config.embed_size self.emb = embedding self.word2Idx = word2Idx self.label2Idx = label2Idx self.description = description self.use_crf = config.crf self.device = config.device self.config = config self.hidden_size1 = config.hidden_size1 self.hidden_size2 = config.hidden_size2 self.embedding = nn.Embedding.from_pretrained( torch.from_numpy(embedding.astype(np.float32)), padding_idx=word2Idx['<PAD>']) self.lstm1 = nn.LSTM(self.embed_size, self.hidden_size1, batch_first=True, bias=True, bidirectional=True) self.lstm2 = nn.LSTM(self.hidden_size1 * 2 + self.embed_size, self.hidden_size2, batch_first=True, bias=True, bidirectional=True) self.fc = nn.Linear(self.hidden_size2 * 2, 3, bias=True) self.dropout = nn.Dropout(config.dropout) if self.use_crf: self.crf = CRF(num_tags=3, batch_first=True)
def __init__( self, config_dic: dict, word_vocab_dim: int, char_vocab_dim: int, sw_vocab_dim_list: List[int], label_vocab_dim: int, pretrain_word_embedding: np.ndarray, ): super().__init__() self.gpu = config_dic.get("gpu") self.label_vocab_dim = label_vocab_dim self.word_lstm = WordLSTM(config_dic, word_vocab_dim, char_vocab_dim, sw_vocab_dim_list, pretrain_word_embedding, config_dic.get("use_modality_attention"), config_dic.get("ner_dropout")) self.hidden2tag = nn.Linear(config_dic.get("word_hidden_dim"), self.label_vocab_dim + 2) # for START and END tag self.crf = CRF(self.label_vocab_dim, self.gpu) if self.gpu: self.word_lstm.cuda() self.hidden2tag.cuda()
def __init__(self, config, model_configs): super(BertBiLSTMCRF, self).__init__(config) self.num_labels = config.num_labels self.max_seq_length = model_configs['max_seq_length'] self.bert = BertModel(config) self.use_cuda = model_configs['use_cuda'] and torch.cuda.is_available() self.crf = CRF(target_size=self.num_labels, use_cuda=self.use_cuda, average_batch=False) bert_embedding = config.hidden_size # hidden_dim即输出维度 # lstm的hidden_dim和init_hidden的hidden_dim是一致的 # 是输出层hidden_dim的1/2 self.hidden_dim = config.hidden_size self.rnn_layers = model_configs['rnn_layers'] self.lstm = nn.LSTM( input_size=bert_embedding, # bert embedding hidden_size=self.hidden_dim, num_layers=self.rnn_layers, batch_first=True, # dropout = model_configs['train']['dropout_rate'], bidirectional=True) self.dropout = nn.Dropout(model_configs['dropout_rate']) self.hidden2label = nn.Linear(self.hidden_dim * 2, self.num_labels + 2) self.apply(self.init_weights)
class BiLSTM_CRF(nn.Module): def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched lstmcrf...") self.gpu = data.HP_gpu # For CRF, we need to add extra two label START and END for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu) def neg_log_likelihood_loss(self, gaz_list, char_inputs, bichar_inputs, char_seq_lengths, batch_label, mask): outs = self.lstm.get_output_score(gaz_list, char_inputs, bichar_inputs, char_seq_lengths) total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return total_loss, tag_seq def forward(self, gaz_list, char_inputs, bichar_inputs, char_seq_lengths, mask): outs = self.lstm.get_output_score(gaz_list, char_inputs, bichar_inputs, char_seq_lengths) scores, tag_seq = self.crf._viterbi_decode(outs, mask) return tag_seq def get_lstm_features(self, gaz_list, char_inputs, bichar_inputs, char_seq_lengths): return self.lstm.get_lstm_features(gaz_list, char_inputs, bichar_inputs, char_seq_lengths)
def __init__(self, data): super(Elmo_SeqLabel, self).__init__() self.use_crf = data.use_crf print("build elmo sequence labeling network...") print("use crf: ", self.use_crf) self.gpu = data.HP_gpu self.average_batch = data.average_batch_loss ## add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.word_hidden = Elmo(data.elmo_options_file, data.elmo_weight_file, 1, requires_grad=data.elmo_tune, dropout=data.elmo_dropout) with open(data.elmo_options_file, 'r') as fin: self._options = json.load(fin) self.hidden2tag = nn.Linear( self._options['lstm']['projection_dim'] * 2, data.label_alphabet_size) if self.use_crf: self.crf = CRF(label_size, self.gpu) if self.gpu >= 0 and torch.cuda.is_available(): self.word_hidden = self.word_hidden.cuda(self.gpu) self.hidden2tag = self.hidden2tag.cuda(self.gpu)
class SeqModel(nn.Module): def __init__(self, data): super(SeqModel, self).__init__() self.data = data self.use_crf = data.use_crf print("build network...") print("word feature extractor: ", data.word_feature_extractor) self.gpu = data.HP_gpu self.average_batch = data.average_batch_loss # opinion 和 evidence 分开抽 label_size = data.label_alphabet_size self.word_hidden = WordSequence(data) if self.use_crf: self.word_crf = CRF(label_size, batch_first=True) if self.gpu: self.word_crf = self.word_crf.cuda() def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths, batch_label, mask, input_label_seq_tensor): lstm_outs = self.word_hidden(word_inputs, word_seq_lengths, input_label_seq_tensor) # lstm_outs(batch_size,sentence_length,tag_size) batch_size = word_inputs.size(0) if self.use_crf: mask = mask.byte() loss = (-self.word_crf(lstm_outs, batch_label, mask)) tag_seq = self.word_crf.decode(lstm_outs, mask) else: loss_function = nn.NLLLoss() seq_len = lstm_outs.size(1) lstm_outs = lstm_outs.view(batch_size * seq_len, -1) score = F.log_softmax(lstm_outs, 1) loss = loss_function( score, batch_label.contiguous().view(batch_size * seq_len)) _, tag_seq = torch.max(score, 1) tag_seq = tag_seq.view(batch_size, seq_len) return loss, tag_seq def evaluate(self, word_inputs, word_seq_lengths, mask, input_label_seq_tensor): lstm_outs = self.word_hidden(word_inputs, word_seq_lengths, input_label_seq_tensor) if self.use_crf: mask = mask.byte() tag_seq = self.word_crf.decode(lstm_outs, mask) else: batch_size = word_inputs.size(0) seq_len = lstm_outs.size(1) lstm_outs = lstm_outs.view(batch_size * seq_len, -1) _, tag_seq = torch.max(lstm_outs, 1) tag_seq = mask.long() * tag_seq.view(batch_size, seq_len) return tag_seq def forward(self, word_inputs, word_seq_lengths, mask, input_label_seq_tensor): return self.evaluate(word_inputs, word_seq_lengths, mask, input_label_seq_tensor)
def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched lstmcrf...") self.gpu = data.HP_gpu # add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu)
def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched lstmcrf...") self.gpu = data.HP_gpu # For CRF, we need to add extra two label START and END for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu)
def __init__(self, data): super(CWS, self).__init__() print("build batched vallina lstmcrf...") self.gpu = data.HP_gpu # add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = Seq(data) self.crf = CRF(label_size, self.gpu) print("finished built model: ", self)
def __init__(self, config): super(Sequence_Label, self).__init__() self.num_labels = len(config.tag2idx) self._bert = Bert_CRF.from_pretrained(config.bert_model_dir, num_labels=self.num_labels) self.crf = CRF(self.num_labels, batch_first=True)
def __init__(self, num_units, rnn_hidden, num_tags, num_layers=1): super(BertNer, self).__init__() self.bert_model = BertModel.from_pretrained(BERT_PRETAIN_PATH) self.rnn = nn.GRU(num_units, rnn_hidden, num_layers=num_layers, batch_first=True, bidirectional=True) self.linear = nn.Linear(2 * rnn_hidden, num_tags) # self.linear = nn.Linear(num_units, num_tags) self.crf = CRF(num_tags)
def train_crf(): word2id, id2word = load_data(TOKEN_DATA) tag2id, id2tag = load_data(TAG_DATA) _, _, train_, x_train, y_train = generate_data(TRAIN_DATA, word2id, tag2id, max_len=hp.max_len) _, _, dev_seq_lens, x_dev, y_dev = generate_data(DEV_DATA, word2id, tag2id, max_len=hp.max_len) model_file = "logdir/model_crf" model = CRF() model.fit(x_train, y_train, template_file='model/module/templates.txt', model_file=model_file, max_iter=20) pre_seq = model.predict(x_dev, model_file=model_file) acc, p, r, f = get_ner_fmeasure(y_dev, pre_seq) print('acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}\n'.format(acc, p, r, f))
class BERT_LSTM_CRF(nn.Module): def __init__(self, bert_config, tagset_size, embedding_dim, hidden_dim, rnn_layers, dropout_ratio, dropout1, use_cuda): super(BERT_LSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.word_embeds = BertModel.from_pretrained(bert_config) self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=rnn_layers, bidirectional=True, dropout=dropout_ratio, batch_first=True) self.rnn_layers = rnn_layers self.dropout1 = nn.Dropout(p=dropout1) self.crf = CRF(target_size=tagset_size, average_batch=True, use_cuda=use_cuda) self.liner = nn.Linear(hidden_dim*2, tagset_size+2) self.tagset_size = tagset_size self.use_cuda = use_cuda def rand_init_hidden(self, batch_size): if self.use_cuda: return Variable( torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)).cuda(), Variable( torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)).cuda() else: return Variable( torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)), Variable( torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)) def get_output_score(self, sentence, attention_mask=None): batch_size = sentence.size(0) seq_length = sentence.size(1) embeds, _ = self.word_embeds(sentence, attention_mask=attention_mask, output_all_encoded_layers=False) hidden = self.rand_init_hidden(batch_size) # if embeds.is_cuda: # hidden = (i.cuda() for i in hidden) lstm_out, hidden = self.lstm(embeds, hidden) lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim * 2) d_lstm_out = self.dropout1(lstm_out) l_out = self.liner(d_lstm_out) lstm_feats = l_out.contiguous().view(batch_size, seq_length, -1) return lstm_feats def forward(self, sentence, masks): lstm_feats = self.get_output_score(sentence) scores, tag_seq = self.crf._viterbi_decode(lstm_feats, masks.byte()) return tag_seq def neg_log_likelihood_loss(self, sentence, mask, tags): lstm_feats = self.get_output_score(sentence) loss_value = self.crf.neg_log_likelihood_loss(lstm_feats, mask, tags) batch_size = lstm_feats.size(0) loss_value /= float(batch_size) return loss_value
def __init__(self, bert_config, tagset_size, embedding_dim, hidden_dim, rnn_layers, dropout_ratio, dropout1, use_cuda): super(BERT_LSTM_CRF, self).__init__() self.embedding_dim = embedding_dim self.hidden_dim = hidden_dim self.word_embeds = BertModel.from_pretrained(bert_config) self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=rnn_layers, bidirectional=True, dropout=dropout_ratio, batch_first=True) self.rnn_layers = rnn_layers self.dropout1 = nn.Dropout(p=dropout1) self.crf = CRF(target_size=tagset_size, average_batch=True, use_cuda=use_cuda) self.liner = nn.Linear(hidden_dim*2, tagset_size+2) self.tagset_size = tagset_size self.use_cuda = use_cuda
def __init__(self, data): super(BiLSTM_CRF, self).__init__() print("build batched BiLSTM CRF...") data.show_data_summary() self.embedding_dim = data.word_emb_dim self.hidden_dim = data.HP_hidden_dim self.drop = nn.Dropout(data.HP_dropout) self.droplstm = nn.Dropout(data.HP_dropout) # 声明embedding self.word_embeddings = nn.Embedding(data.word_alphabet.size(), self.embedding_dim) # 将预训练词向量载入self.word_embeddings中 if data.pretrain_word_embedding is not None: self.word_embeddings.weight.data.copy_( torch.from_numpy(data.pretrain_word_embedding)) else: self.word_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.word_alphabet.size(), self.embedding_dim))) # 声明LSTM self.bilstm_flag = data.HP_bilstm self.lstm_layer = data.HP_lstm_layer if self.bilstm_flag: lstm_hidden = data.HP_hidden_dim // 2 else: lstm_hidden = data.HP_hidden_dim self.lstm = nn.LSTM(self.embedding_dim, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag) # 声明CRF self.index2label = {} for ele in data.label_alphabet.instance2index: self.index2label[data.label_alphabet.instance2index[ele]] = ele self.hidden2tag = nn.Linear(data.HP_hidden_dim, len(self.index2label) + 2) self.crf = CRF(len(self.index2label), data.HP_gpu) # 将模型载入到GPU中 self.gpu = data.HP_gpu if self.gpu: self.drop = self.drop.cuda() self.droplstm = self.droplstm.cuda() self.word_embeddings = self.word_embeddings.cuda() self.hidden2tag = self.hidden2tag.cuda() self.lstm = self.lstm.cuda()
def __init__(self, configs, pretrained_word_embed=None): super(BiLSTMCRF, self).__init__() self.configs = configs self.num_labels = configs['num_labels'] self.max_seq_length = configs['max_seq_length'] self.use_cuda = configs['use_cuda'] and torch.cuda.is_available() self.bilstm = BiLSTM(configs, pretrained_word_embed) self.crf = CRF(target_size=self.num_labels, use_cuda=self.use_cuda, average_batch=False) self.hidden2label = nn.Linear(self.bilstm.hidden_dim * 2, self.num_labels + 2)
def __init__(self, vocab_size, embed_size, num_units, num_layers, num_tag, pre_train, use_cuda): super(RNNCRF, self).__init__() self.num_tag = num_tag self.use_cuda = use_cuda self.crf = CRF(num_tag) self.embedding = nn.Embedding(vocab_size, embed_size, _weight=pre_train) self.rnn = nn.LSTM(embed_size, num_units, num_layers=num_layers, batch_first=True, bidirectional=True) self.linear = nn.Linear(2 * num_units, num_tag)
def __init__(self, data): super(SeqModel, self).__init__() self.data = data self.use_crf = data.use_crf print("build network...") print("word feature extractor: ", data.word_feature_extractor) self.gpu = data.HP_gpu self.average_batch = data.average_batch_loss # opinion 和 evidence 分开抽 label_size = data.label_alphabet_size self.word_hidden = WordSequence(data) if self.use_crf: self.word_crf = CRF(label_size, batch_first=True) if self.gpu: self.word_crf = self.word_crf.cuda()
class Sequence_Label(nn.Module): def __init__(self, config): super(Sequence_Label, self).__init__() self.num_labels = len(config.tag2idx) self._bert = Bert_CRF.from_pretrained(config.bert_model_dir, num_labels=self.num_labels) self.crf = CRF(self.num_labels, batch_first=True) def forward(self, input_ids, attention_mask, token_type_ids=None, labels=None): output = self._bert(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids) attn_mask = attention_mask.type(torch.uint8) if labels is not None: loss = -self.crf( log_soft(output, 2), labels, mask=attn_mask, reduction='mean') return loss else: prediction = self.crf.decode(output, mask=attn_mask) return prediction
def __init__(self, config, model_configs): super(BertCRF, self).__init__(config) self.num_labels = config.num_labels self.max_seq_length = model_configs['max_seq_length'] self.bert = BertModel(config) self.use_cuda = model_configs['use_cuda'] and torch.cuda.is_available() self.crf = CRF(target_size=self.num_labels, use_cuda=self.use_cuda, average_batch=False) bert_embedding = config.hidden_size # hidden_dim即输出维度 # lstm的hidden_dim和init_hidden的hidden_dim是一致的 # 是输出层hidden_dim的1/2 self.hidden_dim = config.hidden_size self.dropout = nn.Dropout(model_configs['dropout_rate']) self.hidden2label = nn.Linear(self.hidden_dim, self.num_labels + 2) self.apply(self.init_weights)
def __init__(self, data, model_config): super(BilstmCrf, self).__init__() if model_config['random_embedding'] == 'True': self.char_embeddings = nn.Embedding(data.char_alphabet_size, model_config['char_emb_dim']) self.char_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.char_alphabet_size, model_config['char_emb_dim']))) self.char_drop = nn.Dropout(model_config['dropout']) else: char_emb_path = model_config['char_emb_file'] self.pretrain_char_embedding, self.char_emb_dim = build_pretrain_embedding( char_emb_path, data.char_alphabet) self.char_embeddings = nn.Embedding(data.char_alphabet_size, model_config['char_emb_dim']) self.char_embeddings.weight.data.copy_( torch.from_numpy(self.pretrain_char_embedding)) # set 'inf' to 0: self.char_embeddings.weight.data[0] = torch.zeros(200) self.char_drop = nn.Dropout(model_config['dropout']) self.intent_embeddings = nn.Embedding(data.intent_alphabet_size, model_config['intent_emb_dim']) self.intent_embeddings.weight.data.copy_( torch.from_numpy( self.random_embedding(data.intent_alphabet_size, model_config['intent_emb_dim']))) self.input_drop = nn.Dropout(model_config['dropout']) self.lstm = nn.LSTM(model_config['char_emb_dim'] + model_config['intent_emb_dim'], model_config['lstm_hidden_dim'] // 2, num_layers=model_config['num_layers'], batch_first=model_config['batch_first'], bidirectional=model_config['bidirectional']) self.drop_lstm = nn.Dropout(model_config['dropout']) self.hidden2tag = nn.Linear(model_config['lstm_hidden_dim'], data.label_alphabet_size + 2) self.crf = CRF(data.label_alphabet_size, model_config['gpu']) self.num_layers = model_config['num_layers'] self.hidden_size = model_config['lstm_hidden_dim'] // 2 self.device = model_config['device']
def __init__(self, num_units, rnn_hidden, num_tags, num_layers=1, use_cuda=False): super(ElmoNer, self).__init__() self.use_cuda = use_cuda self.embedding = Embedder(ELMO_PRETAIN_PATH) self.rnn = nn.GRU(num_units, rnn_hidden, num_layers=num_layers, batch_first=True, bidirectional=True) self.linear = nn.Linear(2 * rnn_hidden, num_tags) # self.linear = nn.Linear(num_units, num_tags) self.crf = CRF(num_tags)
def __init__(self, kwargs): super(CrfTagger2, self).__init__() self.gpu = kwargs.pop("use_gpu", False) self.average_batch = kwargs.pop("average_batch", True) self.crf = NCRFpp_CRF(kwargs["tagset_size"], self.gpu) if kwargs.pop("use_lstm", False): kwargs["tagset_size"] += 2 self.lstm = LstmTagger(**kwargs)
def __init__(self, reader): super(RoleFiller, self).__init__() # reader = Reader('') self.embedding = Glove_Bert_Embedding( reader.word_dict.word_size, reader.config.parser['word_embed_dim'], reader.config.parser['HP_dropout'], reader.build_pre_embedding(use_saved_embed=True), reader.word_dict.idx2word, reader.config.parser['bert_dir']) self.drop_lstm_sent = nn.Dropout(reader.config.parser['HP_dropout'] - 0.1) self.drop_lstm_para = nn.Dropout(reader.config.parser['HP_dropout']) self.batch_average = reader.config.parser['batch_average'] self.embedding_dim = reader.config.parser['word_embed_dim'] + 768 # 768 is set to be the statical bert dimension # LSTM self.hidden_dim = reader.config.parser['HP_hidden_dim'] if reader.config.parser['HP_bilstm']: self.hidden_dim //= 2 # LSTM for paragraph level self.lstm_para = nn.LSTM( self.embedding_dim, self.hidden_dim, reader.config.parser['HP_lstm_layers_num'], batch_first=True, bidirectional=reader.config.parser['HP_bilstm']) # LSTM for sentence level self.lstm_sent = nn.LSTM( self.embedding_dim, self.hidden_dim, reader.config.parser['HP_lstm_layers_num'], batch_first=True, bidirectional=reader.config.parser['HP_bilstm']) # gate-sigmoid sum self.gate = nn.Linear(2 * reader.config.parser['HP_hidden_dim'], reader.config.parser['HP_hidden_dim']) self.sigmoid = nn.Sigmoid() self.hidden2tag = nn.Linear(reader.config.parser['HP_hidden_dim'], reader.tag_dict.word_size + 2) self.softmax = nn.Softmax(dim=-1) self.crf = CRF(reader.tag_dict.word_size)
def __init__(self, data): super(BiLSTM_CRF, self).__init__() print ("build batched lstmcrf...") self.gpu = data.HP_gpu ## add two more label for downlayer lstm, use original label size for CRF label_size = data.label_alphabet_size data.label_alphabet_size += 2 self.lstm = BiLSTM(data) self.crf = CRF(label_size, self.gpu)
class CrfTagger2(nn.Module): # based on SeqLabel in NCRFpp def __init__(self, kwargs): super(CrfTagger2, self).__init__() self.gpu = kwargs.pop("use_gpu", False) self.average_batch = kwargs.pop("average_batch", True) self.crf = NCRFpp_CRF(kwargs["tagset_size"], self.gpu) if kwargs.pop("use_lstm", False): kwargs["tagset_size"] += 2 self.lstm = LstmTagger(**kwargs) @staticmethod def _get_mask(X_lens, batch_size, seq_len): mask = Variable(torch.zeros((batch_size, seq_len))).byte() for idx, X_len in enumerate(X_lens): mask[idx, :X_len] = torch.ones(X_len) return mask def forward(self, input, input_lens): logits = self.lstm.forward(input, input_lens, apply_softmax=False) batch_size, seq_len, _ = logits.size() mask = __class__._get_mask(input_lens, batch_size, seq_len) return logits, mask def loss(self, logits, mask, target): total_loss = self.crf.neg_log_likelihood_loss(logits, mask, target) batch_size, seq_len, _ = logits.size() if self.average_batch: total_loss = total_loss / batch_size return total_loss def decode(self, logits, mask, return_scores=False): scores, tag_seq = self.crf.viterbi_decode(logits, mask) if return_scores: return scores, tag_seq return tag_seq def decode_nbest(self, logits, mask, nbest, return_scores=False): scores, tag_seq = self.crf.viterbi_decode_nbest(logits, mask, nbest) if return_scores: return scores, tag_seq return tag_seq
class ElmoNer(nn.Module): def __init__(self, num_units, rnn_hidden, num_tags, num_layers=1, use_cuda=False): super(ElmoNer, self).__init__() self.use_cuda = use_cuda self.embedding = Embedder(ELMO_PRETAIN_PATH) self.rnn = nn.GRU(num_units, rnn_hidden, num_layers=num_layers, batch_first=True, bidirectional=True) self.linear = nn.Linear(2 * rnn_hidden, num_tags) # self.linear = nn.Linear(num_units, num_tags) self.crf = CRF(num_tags) def forward(self, x_data, y_data, masks): """ 前向算法 :param x_data: :param y_data: :param masks: :return: """ encoded_layers = self.embedding.sents2elmo(x_data) out = self.rnn_layer(encoded_layers) loss = -1 * self.crf(out, y_data.transpose(0, 1), masks.transpose( 0, 1)) return loss def rnn_layer(self, encoded_layers): """ batch seq_len hidden :param encoded_layers: :return: batch seq_len class """ encoded_layers = np.array(encoded_layers) encoded_layers = torch.from_numpy(encoded_layers) if self.use_cuda: encoded_layers = encoded_layers.cuda() out, _ = self.rnn(encoded_layers) out = self.linear(out) out = out.transpose(0, 1) return out def test(self, x_data, masks): encoded_layers = self.embedding.sents2elmo(x_data) out = self.rnn_layer(encoded_layers) best_paths = self.crf.decode(out, mask=masks.transpose(0, 1)) return best_paths