class BiLSTM_CRF(nn.Module):
    def __init__(self, data):
        super(BiLSTM_CRF, self).__init__()
        print ("build batched lstmcrf...")
        self.gpu = data.HP_gpu
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.lstm = BiLSTM(data)
        self.crf = CRF(label_size, self.gpu)


    def neg_log_likelihood_loss(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths,  char_inputs, char_seq_lengths, char_seq_recover, batch_label, mask):
        outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths,  char_inputs, char_seq_lengths, char_seq_recover)
        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return total_loss, tag_seq


    def forward(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover, mask):
        outs = self.lstm.get_output_score(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return tag_seq


    def get_lstm_features(self, gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover):
        return self.lstm.get_lstm_features(gaz_list, word_inputs, biword_inputs, word_seq_lengths, char_inputs, char_seq_lengths, char_seq_recover)
        
class SeqLabel(nn.Module):
    def __init__(self, data):
        super(SeqLabel, self).__init__()
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.word_hidden = WordSequence(data)
        self.crf = CRF(label_size, data.gpu)

    def calculate_loss(self, word_inputs, feature_inputs, word_seq_lengths,
                       char_inputs, char_seq_lengths, char_seq_recover,
                       batch_label, mask):
        outs = self.word_hidden(word_inputs, feature_inputs, word_seq_lengths,
                                char_inputs, char_seq_lengths,
                                char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return total_loss, tag_seq

    def forward(self, word_inputs, feature_inputs, word_seq_lengths,
                char_inputs, char_seq_lengths, char_seq_recover, mask):
        outs = self.word_hidden(word_inputs, feature_inputs, word_seq_lengths,
                                char_inputs, char_seq_lengths,
                                char_seq_recover)
        batch_size = word_inputs.size(0)
        seq_len = word_inputs.size(1)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return tag_seq
    def __init__(self, bert_path, bert_dim, n_class, drop_p, num_pre):
        super(NER_Model, self).__init__()

        self.bert_model = BertModel.from_pretrained(bert_path)
        self.fc = nn.Linear(bert_dim * 2, n_class)
        self.dropout = nn.Dropout(drop_p)
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')

        # pre embedding
        self.pre_dim = bert_dim
        self.pre_embedding = nn.Embedding(num_pre, self.pre_dim)
        self.pre_embedding.weight.data.copy_(
            torch.from_numpy(
                self.random_embedding_label(num_pre, self.pre_dim, 0.025)))

        # transformer
        self.enc_positional_encoding = positional_encoding(768,
                                                           zeros_pad=True,
                                                           scale=True)
        for i in range(hp.num_blocks):
            self.__setattr__(
                'enc_self_attention_%d' % i,
                multihead_attention(num_units=hp.hidden_units,
                                    num_heads=hp.num_heads,
                                    dropout_rate=hp.dropout_rate,
                                    causality=False))
            self.__setattr__(
                'enc_feed_forward_%d' % i,
                feedforward(hp.hidden_units,
                            [4 * hp.hidden_units, hp.hidden_units]))

        # crf
        self.crf = CRF(n_class,
                       use_cuda=True if torch.cuda.is_available() else False)
Beispiel #4
0
class BiLstmCrf(nn.Module):
    def __init__(self, data, configs):
        super(BiLstmCrf, self).__init__()
        if configs['random_embedding']:
            self.word_embeddings = nn.Embedding(data.word_alphabet_size,
                                                configs['word_emb_dim'])
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet_size,
                                          configs['word_emb_dim'])))
            self.word_drop = nn.Dropout(configs['dropout'])
        else:
            pass
        self.lstm = nn.LSTM(configs['word_emb_dim'],
                            configs['hidden_dim'] // 2,
                            num_layers=configs['num_layers'],
                            batch_first=configs['batch_first'],
                            bidirectional=configs['bidirectional'])
        self.drop_lstm = nn.Dropout(configs['dropout'])
        # data.label_alphabet_size大小比label数量大1,是合理的,与label_alphabet的初始化策略有关
        # data.train_ids中,没有一个label值是0,所以softmax_logits[0]也一定是一个非常小的值,取不到
        self.hidden2tag = nn.Linear(configs['hidden_dim'],
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, configs['gpu'])

    def forward(self,
                batch_input,
                batch_len,
                batch_recover,
                mask,
                batch_label=None):
        word_embeds = self.word_drop(self.word_embeddings(batch_input))

        packed_words = pack_padded_sequence(word_embeds,
                                            batch_len.cpu().numpy(),
                                            batch_first=True)
        hidden = None
        lstm_out, hidden = self.lstm(packed_words, hidden)
        lstm_out, _ = pad_packed_sequence(lstm_out)
        lstm_out = self.drop_lstm(lstm_out.transpose(1, 0))

        outputs = self.hidden2tag(lstm_out)

        if batch_label is not None:
            total_loss = self.crf.neg_log_likelihood_loss(
                outputs, mask, batch_label)
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return total_loss, tag_seq
        else:
            scores, tag_seq = self.crf._viterbi_decode(outputs, mask)
            return tag_seq

    @staticmethod
    def random_embedding(vocab_size, embedding_dim):
        pretrain_emb = np.empty([vocab_size, embedding_dim])
        scale = np.sqrt(3.0 / embedding_dim)
        for index in range(vocab_size):
            pretrain_emb[index, :] = np.random.uniform(-scale, scale,
                                                       [1, embedding_dim])
        return pretrain_emb
Beispiel #5
0
    def __init__(self, data):
        super(CnnLstmCrf, self).__init__()

        self.char_embeddings = nn.Embedding(data.char_alphabet_size, config.char_emb_dim)
        self.char_embeddings.weight.data.copy_(
            torch.from_numpy(self.random_embedding(data.char_alphabet_size, config.char_emb_dim)))

        self.char_drop = nn.Dropout(config.dropout)
        self.char_cnn = nn.Conv1d(
            in_channels=config.char_emb_dim, out_channels=config.char_hidden_dim, kernel_size=3, padding=1)

        self.word_embeddings = nn.Embedding(data.word_alphabet_size, config.word_emb_dim)
        self.word_embeddings.weight.data.copy_(
            torch.from_numpy(self.random_embedding(data.word_alphabet_size, config.word_emb_dim)))

        self.word_drop = nn.Dropout(config.dropout)

        self.feature_embeddings = nn.Embedding(data.feat_alphabet_size, config.feature_emb_dim)
        # 加载预训练的feat_emb:
        if len(data.pretrain_feature_embeddings) > 1:
            self.feature_embeddings.weight.data.copy_(torch.from_numpy(data.pretrain_feature_embeddings))

        self.lstm = nn.LSTM(
            config.char_hidden_dim + config.word_emb_dim + config.feature_emb_dim, config.hidden_dim // 2,
            num_layers=1, batch_first=True, bidirectional=True)
        self.droplstm = nn.Dropout(config.dropout)

        self.hidden2tag = nn.Linear(config.hidden_dim, data.label_alphabet_size + 2)  # label_size + 2 (crf的start和end)

        self.crf = CRF(data.label_alphabet_size, config.gpu)
Beispiel #6
0
    def __init__(self, config, embedding, word2Idx, label2Idx, description):
        super(ConceptTagger, self).__init__()
        self.embed_size = config.embed_size
        self.emb = embedding
        self.word2Idx = word2Idx
        self.label2Idx = label2Idx
        self.description = description
        self.use_crf = config.crf
        self.device = config.device
        self.config = config
        self.hidden_size1 = config.hidden_size1
        self.hidden_size2 = config.hidden_size2

        self.embedding = nn.Embedding.from_pretrained(
            torch.from_numpy(embedding.astype(np.float32)),
            padding_idx=word2Idx['<PAD>'])
        self.lstm1 = nn.LSTM(self.embed_size,
                             self.hidden_size1,
                             batch_first=True,
                             bias=True,
                             bidirectional=True)
        self.lstm2 = nn.LSTM(self.hidden_size1 * 2 + self.embed_size,
                             self.hidden_size2,
                             batch_first=True,
                             bias=True,
                             bidirectional=True)
        self.fc = nn.Linear(self.hidden_size2 * 2, 3, bias=True)
        self.dropout = nn.Dropout(config.dropout)
        if self.use_crf:
            self.crf = CRF(num_tags=3, batch_first=True)
    def __init__(
        self,
        config_dic: dict,
        word_vocab_dim: int,
        char_vocab_dim: int,
        sw_vocab_dim_list: List[int],
        label_vocab_dim: int,
        pretrain_word_embedding: np.ndarray,
    ):
        super().__init__()
        self.gpu = config_dic.get("gpu")
        self.label_vocab_dim = label_vocab_dim

        self.word_lstm = WordLSTM(config_dic, word_vocab_dim, char_vocab_dim,
                                  sw_vocab_dim_list, pretrain_word_embedding,
                                  config_dic.get("use_modality_attention"),
                                  config_dic.get("ner_dropout"))
        self.hidden2tag = nn.Linear(config_dic.get("word_hidden_dim"),
                                    self.label_vocab_dim +
                                    2)  # for START and END tag
        self.crf = CRF(self.label_vocab_dim, self.gpu)

        if self.gpu:
            self.word_lstm.cuda()
            self.hidden2tag.cuda()
Beispiel #8
0
 def __init__(self, config, model_configs):
     super(BertBiLSTMCRF, self).__init__(config)
     self.num_labels = config.num_labels
     self.max_seq_length = model_configs['max_seq_length']
     self.bert = BertModel(config)
     self.use_cuda = model_configs['use_cuda'] and torch.cuda.is_available()
     self.crf = CRF(target_size=self.num_labels,
                    use_cuda=self.use_cuda,
                    average_batch=False)
     bert_embedding = config.hidden_size
     # hidden_dim即输出维度
     # lstm的hidden_dim和init_hidden的hidden_dim是一致的
     # 是输出层hidden_dim的1/2
     self.hidden_dim = config.hidden_size
     self.rnn_layers = model_configs['rnn_layers']
     self.lstm = nn.LSTM(
         input_size=bert_embedding,  # bert embedding
         hidden_size=self.hidden_dim,
         num_layers=self.rnn_layers,
         batch_first=True,
         # dropout = model_configs['train']['dropout_rate'],
         bidirectional=True)
     self.dropout = nn.Dropout(model_configs['dropout_rate'])
     self.hidden2label = nn.Linear(self.hidden_dim * 2, self.num_labels + 2)
     self.apply(self.init_weights)
Beispiel #9
0
class BiLSTM_CRF(nn.Module):
    def __init__(self, data):
        super(BiLSTM_CRF, self).__init__()
        print("build batched lstmcrf...")
        self.gpu = data.HP_gpu
        # For CRF, we need to add extra two label START and END for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2
        self.lstm = BiLSTM(data)
        self.crf = CRF(label_size, self.gpu)

    def neg_log_likelihood_loss(self, gaz_list, char_inputs, bichar_inputs,
                                char_seq_lengths, batch_label, mask):
        outs = self.lstm.get_output_score(gaz_list, char_inputs, bichar_inputs,
                                          char_seq_lengths)
        total_loss = self.crf.neg_log_likelihood_loss(outs, mask, batch_label)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return total_loss, tag_seq

    def forward(self, gaz_list, char_inputs, bichar_inputs, char_seq_lengths,
                mask):
        outs = self.lstm.get_output_score(gaz_list, char_inputs, bichar_inputs,
                                          char_seq_lengths)
        scores, tag_seq = self.crf._viterbi_decode(outs, mask)
        return tag_seq

    def get_lstm_features(self, gaz_list, char_inputs, bichar_inputs,
                          char_seq_lengths):
        return self.lstm.get_lstm_features(gaz_list, char_inputs,
                                           bichar_inputs, char_seq_lengths)
Beispiel #10
0
    def __init__(self, data):
        super(Elmo_SeqLabel, self).__init__()
        self.use_crf = data.use_crf
        print("build elmo sequence labeling network...")
        print("use crf: ", self.use_crf)

        self.gpu = data.HP_gpu
        self.average_batch = data.average_batch_loss
        ## add two more label for downlayer lstm, use original label size for CRF
        label_size = data.label_alphabet_size
        data.label_alphabet_size += 2

        self.word_hidden = Elmo(data.elmo_options_file,
                                data.elmo_weight_file,
                                1,
                                requires_grad=data.elmo_tune,
                                dropout=data.elmo_dropout)

        with open(data.elmo_options_file, 'r') as fin:
            self._options = json.load(fin)
        self.hidden2tag = nn.Linear(
            self._options['lstm']['projection_dim'] * 2,
            data.label_alphabet_size)

        if self.use_crf:
            self.crf = CRF(label_size, self.gpu)

        if self.gpu >= 0 and torch.cuda.is_available():
            self.word_hidden = self.word_hidden.cuda(self.gpu)
            self.hidden2tag = self.hidden2tag.cuda(self.gpu)
class SeqModel(nn.Module):
    def __init__(self, data):
        super(SeqModel, self).__init__()
        self.data = data
        self.use_crf = data.use_crf
        print("build network...")
        print("word feature extractor: ", data.word_feature_extractor)

        self.gpu = data.HP_gpu
        self.average_batch = data.average_batch_loss
        # opinion 和 evidence 分开抽
        label_size = data.label_alphabet_size
        self.word_hidden = WordSequence(data)
        if self.use_crf:
            self.word_crf = CRF(label_size, batch_first=True)
            if self.gpu:
                self.word_crf = self.word_crf.cuda()

    def neg_log_likelihood_loss(self, word_inputs, word_seq_lengths,
                                batch_label, mask, input_label_seq_tensor):
        lstm_outs = self.word_hidden(word_inputs, word_seq_lengths,
                                     input_label_seq_tensor)
        # lstm_outs(batch_size,sentence_length,tag_size)
        batch_size = word_inputs.size(0)
        if self.use_crf:
            mask = mask.byte()
            loss = (-self.word_crf(lstm_outs, batch_label, mask))
            tag_seq = self.word_crf.decode(lstm_outs, mask)
        else:
            loss_function = nn.NLLLoss()
            seq_len = lstm_outs.size(1)
            lstm_outs = lstm_outs.view(batch_size * seq_len, -1)
            score = F.log_softmax(lstm_outs, 1)
            loss = loss_function(
                score,
                batch_label.contiguous().view(batch_size * seq_len))
            _, tag_seq = torch.max(score, 1)
            tag_seq = tag_seq.view(batch_size, seq_len)
        return loss, tag_seq

    def evaluate(self, word_inputs, word_seq_lengths, mask,
                 input_label_seq_tensor):
        lstm_outs = self.word_hidden(word_inputs, word_seq_lengths,
                                     input_label_seq_tensor)
        if self.use_crf:
            mask = mask.byte()
            tag_seq = self.word_crf.decode(lstm_outs, mask)
        else:
            batch_size = word_inputs.size(0)
            seq_len = lstm_outs.size(1)
            lstm_outs = lstm_outs.view(batch_size * seq_len, -1)
            _, tag_seq = torch.max(lstm_outs, 1)
            tag_seq = mask.long() * tag_seq.view(batch_size, seq_len)
        return tag_seq

    def forward(self, word_inputs, word_seq_lengths, mask,
                input_label_seq_tensor):
        return self.evaluate(word_inputs, word_seq_lengths, mask,
                             input_label_seq_tensor)
Beispiel #12
0
 def __init__(self, data):
     super(BiLSTM_CRF, self).__init__()
     print("build batched lstmcrf...")
     self.gpu = data.HP_gpu
     # add two more label for downlayer lstm, use original label size for CRF
     label_size = data.label_alphabet_size
     data.label_alphabet_size += 2
     self.lstm = BiLSTM(data)
     self.crf = CRF(label_size, self.gpu)
Beispiel #13
0
 def __init__(self, data):
     super(BiLSTM_CRF, self).__init__()
     print("build batched lstmcrf...")
     self.gpu = data.HP_gpu
     # For CRF, we need to add extra two label START and END for downlayer lstm, use original label size for CRF
     label_size = data.label_alphabet_size
     data.label_alphabet_size += 2
     self.lstm = BiLSTM(data)
     self.crf = CRF(label_size, self.gpu)
Beispiel #14
0
 def __init__(self, data):
     super(CWS, self).__init__()
     print("build batched vallina lstmcrf...")
     self.gpu = data.HP_gpu
     #  add two more label for downlayer lstm, use original label size for CRF
     label_size = data.label_alphabet_size
     data.label_alphabet_size += 2
     self.lstm = Seq(data)
     self.crf = CRF(label_size, self.gpu)
     print("finished built model: ", self)
Beispiel #15
0
    def __init__(self, config):

        super(Sequence_Label, self).__init__()

        self.num_labels = len(config.tag2idx)

        self._bert = Bert_CRF.from_pretrained(config.bert_model_dir,
                                              num_labels=self.num_labels)

        self.crf = CRF(self.num_labels, batch_first=True)
Beispiel #16
0
 def __init__(self, num_units, rnn_hidden, num_tags, num_layers=1):
     super(BertNer, self).__init__()
     self.bert_model = BertModel.from_pretrained(BERT_PRETAIN_PATH)
     self.rnn = nn.GRU(num_units,
                       rnn_hidden,
                       num_layers=num_layers,
                       batch_first=True,
                       bidirectional=True)
     self.linear = nn.Linear(2 * rnn_hidden, num_tags)
     # self.linear = nn.Linear(num_units, num_tags)
     self.crf = CRF(num_tags)
Beispiel #17
0
def train_crf():
	word2id, id2word = load_data(TOKEN_DATA)
	tag2id, id2tag = load_data(TAG_DATA)
	_, _, train_, x_train, y_train = generate_data(TRAIN_DATA, word2id, tag2id, max_len=hp.max_len)
	_, _, dev_seq_lens, x_dev, y_dev = generate_data(DEV_DATA, word2id, tag2id, max_len=hp.max_len)
	model_file = "logdir/model_crf"
	model = CRF()
	model.fit(x_train, y_train, template_file='model/module/templates.txt', model_file=model_file, max_iter=20)
	pre_seq = model.predict(x_dev, model_file=model_file)
	acc, p, r, f = get_ner_fmeasure(y_dev, pre_seq)
	print('acc:\t{}\tp:\t{}\tr:\t{}\tf:\t{}\n'.format(acc, p, r, f))
Beispiel #18
0
class BERT_LSTM_CRF(nn.Module):
    def __init__(self, bert_config, tagset_size, embedding_dim, hidden_dim, rnn_layers, dropout_ratio, dropout1, use_cuda):
        super(BERT_LSTM_CRF, self).__init__()
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.word_embeds = BertModel.from_pretrained(bert_config)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim,
                            num_layers=rnn_layers, bidirectional=True, dropout=dropout_ratio, batch_first=True)
        self.rnn_layers = rnn_layers
        self.dropout1 = nn.Dropout(p=dropout1)
        self.crf = CRF(target_size=tagset_size, average_batch=True, use_cuda=use_cuda)
        self.liner = nn.Linear(hidden_dim*2, tagset_size+2)
        self.tagset_size = tagset_size
        self.use_cuda =  use_cuda

    def rand_init_hidden(self, batch_size):
        if self.use_cuda:

            return Variable(
                torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)).cuda(), Variable(
                torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)).cuda()
        else:
            return Variable(
                torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim)), Variable(
                torch.randn(2 * self.rnn_layers, batch_size, self.hidden_dim))

    def get_output_score(self, sentence, attention_mask=None):
        batch_size = sentence.size(0)
        seq_length = sentence.size(1)
        embeds, _ = self.word_embeds(sentence, attention_mask=attention_mask, output_all_encoded_layers=False)

        hidden = self.rand_init_hidden(batch_size)
        # if embeds.is_cuda:
        #     hidden = (i.cuda() for i in hidden)
        lstm_out, hidden = self.lstm(embeds, hidden)
        lstm_out = lstm_out.contiguous().view(-1, self.hidden_dim * 2)
        d_lstm_out = self.dropout1(lstm_out)
        l_out = self.liner(d_lstm_out)
        lstm_feats = l_out.contiguous().view(batch_size, seq_length, -1)
        return lstm_feats

    def forward(self, sentence, masks):
        lstm_feats = self.get_output_score(sentence)
        scores, tag_seq = self.crf._viterbi_decode(lstm_feats, masks.byte())
        return tag_seq

    def neg_log_likelihood_loss(self, sentence, mask, tags):
        lstm_feats = self.get_output_score(sentence)
        loss_value = self.crf.neg_log_likelihood_loss(lstm_feats, mask, tags)
        batch_size = lstm_feats.size(0)
        loss_value /= float(batch_size)
        return loss_value
Beispiel #19
0
 def __init__(self, bert_config, tagset_size, embedding_dim, hidden_dim, rnn_layers, dropout_ratio, dropout1, use_cuda):
     super(BERT_LSTM_CRF, self).__init__()
     self.embedding_dim = embedding_dim
     self.hidden_dim = hidden_dim
     self.word_embeds = BertModel.from_pretrained(bert_config)
     self.lstm = nn.LSTM(embedding_dim, hidden_dim,
                         num_layers=rnn_layers, bidirectional=True, dropout=dropout_ratio, batch_first=True)
     self.rnn_layers = rnn_layers
     self.dropout1 = nn.Dropout(p=dropout1)
     self.crf = CRF(target_size=tagset_size, average_batch=True, use_cuda=use_cuda)
     self.liner = nn.Linear(hidden_dim*2, tagset_size+2)
     self.tagset_size = tagset_size
     self.use_cuda =  use_cuda
Beispiel #20
0
    def __init__(self, data):
        super(BiLSTM_CRF, self).__init__()
        print("build batched BiLSTM CRF...")
        data.show_data_summary()
        self.embedding_dim = data.word_emb_dim
        self.hidden_dim = data.HP_hidden_dim
        self.drop = nn.Dropout(data.HP_dropout)
        self.droplstm = nn.Dropout(data.HP_dropout)

        # 声明embedding
        self.word_embeddings = nn.Embedding(data.word_alphabet.size(),
                                            self.embedding_dim)
        # 将预训练词向量载入self.word_embeddings中
        if data.pretrain_word_embedding is not None:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(data.pretrain_word_embedding))
        else:
            self.word_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.word_alphabet.size(),
                                          self.embedding_dim)))

        # 声明LSTM
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim
        self.lstm = nn.LSTM(self.embedding_dim,
                            lstm_hidden,
                            num_layers=self.lstm_layer,
                            batch_first=True,
                            bidirectional=self.bilstm_flag)

        # 声明CRF
        self.index2label = {}
        for ele in data.label_alphabet.instance2index:
            self.index2label[data.label_alphabet.instance2index[ele]] = ele
        self.hidden2tag = nn.Linear(data.HP_hidden_dim,
                                    len(self.index2label) + 2)
        self.crf = CRF(len(self.index2label), data.HP_gpu)

        # 将模型载入到GPU中
        self.gpu = data.HP_gpu
        if self.gpu:
            self.drop = self.drop.cuda()
            self.droplstm = self.droplstm.cuda()
            self.word_embeddings = self.word_embeddings.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            self.lstm = self.lstm.cuda()
Beispiel #21
0
    def __init__(self, configs, pretrained_word_embed=None):
        super(BiLSTMCRF, self).__init__()

        self.configs = configs
        self.num_labels = configs['num_labels']
        self.max_seq_length = configs['max_seq_length']
        self.use_cuda = configs['use_cuda'] and torch.cuda.is_available()

        self.bilstm = BiLSTM(configs, pretrained_word_embed)
        self.crf = CRF(target_size=self.num_labels,
                       use_cuda=self.use_cuda,
                       average_batch=False)
        self.hidden2label = nn.Linear(self.bilstm.hidden_dim * 2,
                                      self.num_labels + 2)
Beispiel #22
0
 def __init__(self, vocab_size, embed_size, num_units, num_layers, num_tag,
              pre_train, use_cuda):
     super(RNNCRF, self).__init__()
     self.num_tag = num_tag
     self.use_cuda = use_cuda
     self.crf = CRF(num_tag)
     self.embedding = nn.Embedding(vocab_size,
                                   embed_size,
                                   _weight=pre_train)
     self.rnn = nn.LSTM(embed_size,
                        num_units,
                        num_layers=num_layers,
                        batch_first=True,
                        bidirectional=True)
     self.linear = nn.Linear(2 * num_units, num_tag)
    def __init__(self, data):
        super(SeqModel, self).__init__()
        self.data = data
        self.use_crf = data.use_crf
        print("build network...")
        print("word feature extractor: ", data.word_feature_extractor)

        self.gpu = data.HP_gpu
        self.average_batch = data.average_batch_loss
        # opinion 和 evidence 分开抽
        label_size = data.label_alphabet_size
        self.word_hidden = WordSequence(data)
        if self.use_crf:
            self.word_crf = CRF(label_size, batch_first=True)
            if self.gpu:
                self.word_crf = self.word_crf.cuda()
Beispiel #24
0
class Sequence_Label(nn.Module):
    def __init__(self, config):

        super(Sequence_Label, self).__init__()

        self.num_labels = len(config.tag2idx)

        self._bert = Bert_CRF.from_pretrained(config.bert_model_dir,
                                              num_labels=self.num_labels)

        self.crf = CRF(self.num_labels, batch_first=True)

    def forward(self,
                input_ids,
                attention_mask,
                token_type_ids=None,
                labels=None):
        output = self._bert(input_ids=input_ids,
                            attention_mask=attention_mask,
                            token_type_ids=token_type_ids)

        attn_mask = attention_mask.type(torch.uint8)

        if labels is not None:
            loss = -self.crf(
                log_soft(output, 2), labels, mask=attn_mask, reduction='mean')
            return loss
        else:
            prediction = self.crf.decode(output, mask=attn_mask)
            return prediction
Beispiel #25
0
 def __init__(self, config, model_configs):
     super(BertCRF, self).__init__(config)
     self.num_labels = config.num_labels
     self.max_seq_length = model_configs['max_seq_length']
     self.bert = BertModel(config)
     self.use_cuda = model_configs['use_cuda'] and torch.cuda.is_available()
     self.crf = CRF(target_size=self.num_labels,
                    use_cuda=self.use_cuda,
                    average_batch=False)
     bert_embedding = config.hidden_size
     # hidden_dim即输出维度
     # lstm的hidden_dim和init_hidden的hidden_dim是一致的
     # 是输出层hidden_dim的1/2
     self.hidden_dim = config.hidden_size
     self.dropout = nn.Dropout(model_configs['dropout_rate'])
     self.hidden2label = nn.Linear(self.hidden_dim, self.num_labels + 2)
     self.apply(self.init_weights)
    def __init__(self, data, model_config):
        super(BilstmCrf, self).__init__()
        if model_config['random_embedding'] == 'True':
            self.char_embeddings = nn.Embedding(data.char_alphabet_size,
                                                model_config['char_emb_dim'])
            self.char_embeddings.weight.data.copy_(
                torch.from_numpy(
                    self.random_embedding(data.char_alphabet_size,
                                          model_config['char_emb_dim'])))
            self.char_drop = nn.Dropout(model_config['dropout'])
        else:
            char_emb_path = model_config['char_emb_file']
            self.pretrain_char_embedding, self.char_emb_dim = build_pretrain_embedding(
                char_emb_path, data.char_alphabet)
            self.char_embeddings = nn.Embedding(data.char_alphabet_size,
                                                model_config['char_emb_dim'])
            self.char_embeddings.weight.data.copy_(
                torch.from_numpy(self.pretrain_char_embedding))
            # set 'inf' to 0:
            self.char_embeddings.weight.data[0] = torch.zeros(200)
            self.char_drop = nn.Dropout(model_config['dropout'])

        self.intent_embeddings = nn.Embedding(data.intent_alphabet_size,
                                              model_config['intent_emb_dim'])
        self.intent_embeddings.weight.data.copy_(
            torch.from_numpy(
                self.random_embedding(data.intent_alphabet_size,
                                      model_config['intent_emb_dim'])))

        self.input_drop = nn.Dropout(model_config['dropout'])

        self.lstm = nn.LSTM(model_config['char_emb_dim'] +
                            model_config['intent_emb_dim'],
                            model_config['lstm_hidden_dim'] // 2,
                            num_layers=model_config['num_layers'],
                            batch_first=model_config['batch_first'],
                            bidirectional=model_config['bidirectional'])
        self.drop_lstm = nn.Dropout(model_config['dropout'])

        self.hidden2tag = nn.Linear(model_config['lstm_hidden_dim'],
                                    data.label_alphabet_size + 2)
        self.crf = CRF(data.label_alphabet_size, model_config['gpu'])

        self.num_layers = model_config['num_layers']
        self.hidden_size = model_config['lstm_hidden_dim'] // 2
        self.device = model_config['device']
Beispiel #27
0
 def __init__(self,
              num_units,
              rnn_hidden,
              num_tags,
              num_layers=1,
              use_cuda=False):
     super(ElmoNer, self).__init__()
     self.use_cuda = use_cuda
     self.embedding = Embedder(ELMO_PRETAIN_PATH)
     self.rnn = nn.GRU(num_units,
                       rnn_hidden,
                       num_layers=num_layers,
                       batch_first=True,
                       bidirectional=True)
     self.linear = nn.Linear(2 * rnn_hidden, num_tags)
     # self.linear = nn.Linear(num_units, num_tags)
     self.crf = CRF(num_tags)
Beispiel #28
0
 def __init__(self, kwargs):
     super(CrfTagger2, self).__init__()
     self.gpu = kwargs.pop("use_gpu", False)
     self.average_batch = kwargs.pop("average_batch", True)
     self.crf = NCRFpp_CRF(kwargs["tagset_size"], self.gpu)
     if kwargs.pop("use_lstm", False):
         kwargs["tagset_size"] += 2
         self.lstm = LstmTagger(**kwargs)
    def __init__(self, reader):
        super(RoleFiller, self).__init__()
        # reader = Reader('')
        self.embedding = Glove_Bert_Embedding(
            reader.word_dict.word_size, reader.config.parser['word_embed_dim'],
            reader.config.parser['HP_dropout'],
            reader.build_pre_embedding(use_saved_embed=True),
            reader.word_dict.idx2word, reader.config.parser['bert_dir'])
        self.drop_lstm_sent = nn.Dropout(reader.config.parser['HP_dropout'] -
                                         0.1)
        self.drop_lstm_para = nn.Dropout(reader.config.parser['HP_dropout'])
        self.batch_average = reader.config.parser['batch_average']

        self.embedding_dim = reader.config.parser['word_embed_dim'] + 768
        # 768 is set to be the statical bert dimension

        # LSTM
        self.hidden_dim = reader.config.parser['HP_hidden_dim']
        if reader.config.parser['HP_bilstm']:
            self.hidden_dim //= 2
        # LSTM for paragraph level
        self.lstm_para = nn.LSTM(
            self.embedding_dim,
            self.hidden_dim,
            reader.config.parser['HP_lstm_layers_num'],
            batch_first=True,
            bidirectional=reader.config.parser['HP_bilstm'])
        # LSTM for sentence level
        self.lstm_sent = nn.LSTM(
            self.embedding_dim,
            self.hidden_dim,
            reader.config.parser['HP_lstm_layers_num'],
            batch_first=True,
            bidirectional=reader.config.parser['HP_bilstm'])

        # gate-sigmoid sum
        self.gate = nn.Linear(2 * reader.config.parser['HP_hidden_dim'],
                              reader.config.parser['HP_hidden_dim'])
        self.sigmoid = nn.Sigmoid()

        self.hidden2tag = nn.Linear(reader.config.parser['HP_hidden_dim'],
                                    reader.tag_dict.word_size + 2)
        self.softmax = nn.Softmax(dim=-1)

        self.crf = CRF(reader.tag_dict.word_size)
 def __init__(self, data):
     super(BiLSTM_CRF, self).__init__()
     print ("build batched lstmcrf...")
     self.gpu = data.HP_gpu
     ## add two more label for downlayer lstm, use original label size for CRF
     label_size = data.label_alphabet_size
     data.label_alphabet_size += 2
     self.lstm = BiLSTM(data)
     self.crf = CRF(label_size, self.gpu)
Beispiel #31
0
class CrfTagger2(nn.Module):
    # based on SeqLabel in NCRFpp
    def __init__(self, kwargs):
        super(CrfTagger2, self).__init__()
        self.gpu = kwargs.pop("use_gpu", False)
        self.average_batch = kwargs.pop("average_batch", True)
        self.crf = NCRFpp_CRF(kwargs["tagset_size"], self.gpu)
        if kwargs.pop("use_lstm", False):
            kwargs["tagset_size"] += 2
            self.lstm = LstmTagger(**kwargs)


    @staticmethod
    def _get_mask(X_lens, batch_size, seq_len):
        mask = Variable(torch.zeros((batch_size, seq_len))).byte()
        for idx, X_len in enumerate(X_lens):
            mask[idx, :X_len] = torch.ones(X_len)
        return mask

    def forward(self, input, input_lens):
        logits = self.lstm.forward(input, input_lens, apply_softmax=False)
        batch_size, seq_len, _ = logits.size()
        mask = __class__._get_mask(input_lens, batch_size, seq_len)
        return logits, mask

    def loss(self, logits, mask, target):
        total_loss = self.crf.neg_log_likelihood_loss(logits, mask, target)
        batch_size, seq_len, _ = logits.size()
        if self.average_batch:
            total_loss = total_loss / batch_size
        return total_loss

    def decode(self, logits, mask, return_scores=False):
        scores, tag_seq = self.crf.viterbi_decode(logits, mask)
        if return_scores:
            return scores, tag_seq
        return tag_seq

    def decode_nbest(self, logits, mask, nbest, return_scores=False):
        scores, tag_seq = self.crf.viterbi_decode_nbest(logits, mask, nbest)
        if return_scores:
            return scores, tag_seq
        return tag_seq
Beispiel #32
0
class ElmoNer(nn.Module):
    def __init__(self,
                 num_units,
                 rnn_hidden,
                 num_tags,
                 num_layers=1,
                 use_cuda=False):
        super(ElmoNer, self).__init__()
        self.use_cuda = use_cuda
        self.embedding = Embedder(ELMO_PRETAIN_PATH)
        self.rnn = nn.GRU(num_units,
                          rnn_hidden,
                          num_layers=num_layers,
                          batch_first=True,
                          bidirectional=True)
        self.linear = nn.Linear(2 * rnn_hidden, num_tags)
        # self.linear = nn.Linear(num_units, num_tags)
        self.crf = CRF(num_tags)

    def forward(self, x_data, y_data, masks):
        """
		前向算法
		:param x_data:
		:param y_data:
		:param masks:
		:return:
		"""
        encoded_layers = self.embedding.sents2elmo(x_data)
        out = self.rnn_layer(encoded_layers)
        loss = -1 * self.crf(out, y_data.transpose(0, 1), masks.transpose(
            0, 1))
        return loss

    def rnn_layer(self, encoded_layers):
        """
		batch seq_len hidden
		:param encoded_layers:
		:return: batch seq_len class
		"""
        encoded_layers = np.array(encoded_layers)
        encoded_layers = torch.from_numpy(encoded_layers)
        if self.use_cuda:
            encoded_layers = encoded_layers.cuda()
        out, _ = self.rnn(encoded_layers)
        out = self.linear(out)
        out = out.transpose(0, 1)
        return out

    def test(self, x_data, masks):
        encoded_layers = self.embedding.sents2elmo(x_data)

        out = self.rnn_layer(encoded_layers)
        best_paths = self.crf.decode(out, mask=masks.transpose(0, 1))
        return best_paths