예제 #1
0
    def __init__(self, args):
        super(BiLSTM_CRF, self).__init__()
        self.embedding_dim = args.embedding_dim
        self.hidden_dim = args.hidden_dim
        self.vocab_size = args.vocab_size
        # self.tag_to_ix = args.tag_to_ix
        # don't count the padding tag for the classifier output
        self.tagset_size = args.tagset_size

        # whenever the embedding sees the padding index it'll make the whole vector zeros
        padding_idx = 0
        self.word_embeds = nn.Embedding(self.vocab_size,
                                        self.embedding_dim,
                                        padding_idx=padding_idx)
        self.lstm = nn.LSTM(self.embedding_dim,
                            self.hidden_dim,
                            dropout=0.5,
                            num_layers=1,
                            bidirectional=True)
        self.gru = nn.GRU(self.embedding_dim,
                          self.hidden_dim,
                          dropout=0.5,
                          num_layers=1,
                          bidirectional=True)

        # Maps the output of the LSTM into tag space.
        self.hidden2tag = nn.Linear(self.hidden_dim * 2, self.tagset_size)

        # initial crf layer
        self.crf = CRF(self.tagset_size)
예제 #2
0
    def __init__(self, config):
        super(Sequence_Label, self).__init__()
        self.config = config
        # embed
        self.embed_num = config.embed_num
        self.embed_dim = config.embed_dim
        self.label_num = config.class_num
        self.paddingId = config.paddingId
        # dropout
        self.dropout_emb = config.dropout_emb
        self.dropout = config.dropout
        # lstm
        self.lstm_hiddens = config.lstm_hiddens
        self.lstm_layers = config.lstm_layers
        # pretrain
        self.pretrained_embed = config.pretrained_embed
        self.pretrained_weight = config.pretrained_weight
        # char
        self.use_char = config.use_char
        self.char_embed_num = config.char_embed_num
        self.char_paddingId = config.char_paddingId
        self.char_dim = config.char_dim
        self.conv_filter_sizes = self._conv_filter(config.conv_filter_sizes)
        self.conv_filter_nums = self._conv_filter(config.conv_filter_nums)
        assert len(self.conv_filter_sizes) == len(self.conv_filter_nums)
        # print(self.conv_filter_nums)
        # print(self.conv_filter_sizes)
        # exit()
        # use crf
        self.use_crf = config.use_crf

        # cuda or cpu
        self.device = config.device

        self.target_size = self.label_num if self.use_crf is False else self.label_num + 2

        if self.use_char is True:
            self.encoder_model = BiLSTM_CNN(embed_num=self.embed_num, embed_dim=self.embed_dim, label_num=self.target_size,
                                            paddingId=self.paddingId, dropout_emb=self.dropout_emb, dropout=self.dropout,
                                            lstm_hiddens=self.lstm_hiddens, lstm_layers=self.lstm_layers,
                                            pretrained_embed=self.pretrained_embed, pretrained_weight=self.pretrained_weight,
                                            char_embed_num=self.char_embed_num, char_dim=self.char_dim,
                                            char_paddingId=self.char_paddingId, conv_filter_sizes=self.conv_filter_sizes,
                                            conv_filter_nums=self.conv_filter_nums, device=self.device)
        else:
            self.encoder_model = BiLSTM(embed_num=self.embed_num, embed_dim=self.embed_dim, label_num=self.target_size,
                                        paddingId=self.paddingId, dropout_emb=self.dropout_emb, dropout=self.dropout,
                                        lstm_hiddens=self.lstm_hiddens, lstm_layers=self.lstm_layers,
                                        pretrained_embed=self.pretrained_embed, pretrained_weight=self.pretrained_weight,
                                        device=self.device)
        if self.use_crf is True:
            args_crf = dict({'target_size': self.label_num, 'device': self.device})
            self.crf_layer = CRF(**args_crf)
예제 #3
0
    def __init__(self, args):
        super(BERT_CRF, self).__init__()
        self.embedding_dim = args.embedding_dim
        self.hidden_dim = args.hidden_dim
        # self.vocab_size = args.vocab_size
        # self.tag_to_ix = args.tag_to_ix
        # don't count the padding tag for the classifier output
        self.tagset_size = args.tagset_size
        self.bert_model_name = args.bert_model_name
        # whenever the embedding sees the padding index it'll make the whole vector zeros
        # padding_idx = 0
        # self.word_embeds = nn.Embedding(self.vocab_size, self.embedding_dim, padding_idx=padding_idx)
        if self.bert_model_name.startswith('bert-'):
            self.word_embeds = BertModel.from_pretrained(self.bert_model_name)
            print('load pre-trained model of {}'.format(self.bert_model_name))
        elif self.bert_model_name.startswith('albert-'):
            self.word_embeds = AlbertModel.from_pretrained(
                self.bert_model_name)
            print('load pre-trained model of {}'.format(self.bert_model_name))
        else:
            print('bert model {} not found!!!'.format(self.bert_model_name))

        self.lstm = nn.LSTM(self.embedding_dim,
                            self.hidden_dim,
                            num_layers=1,
                            bidirectional=True,
                            dropout=0.5)
        # self.gru = nn.GRU(self.embedding_dim, self.hidden_dim,
        #                   dropout=0.5, num_layers=1, bidirectional=True)

        # Maps the output of the LSTM into tag space.
        self.hidden2tag = nn.Linear(self.hidden_dim * 2, self.tagset_size)
        # self.linear = nn.Linear(self.embedding_dim, self.tagset_size)

        # initial crf layer
        self.crf = CRF(self.tagset_size)
예제 #4
0
class BiLSTM_CRF(nn.Module):
    def __init__(self, args):
        super(BiLSTM_CRF, self).__init__()
        self.embedding_dim = args.embedding_dim
        self.hidden_dim = args.hidden_dim
        self.vocab_size = args.vocab_size
        # self.tag_to_ix = args.tag_to_ix
        # don't count the padding tag for the classifier output
        self.tagset_size = args.tagset_size

        # whenever the embedding sees the padding index it'll make the whole vector zeros
        padding_idx = 0
        self.word_embeds = nn.Embedding(self.vocab_size,
                                        self.embedding_dim,
                                        padding_idx=padding_idx)
        self.lstm = nn.LSTM(self.embedding_dim,
                            self.hidden_dim,
                            dropout=0.5,
                            num_layers=1,
                            bidirectional=True)
        self.gru = nn.GRU(self.embedding_dim,
                          self.hidden_dim,
                          dropout=0.5,
                          num_layers=1,
                          bidirectional=True)

        # Maps the output of the LSTM into tag space.
        self.hidden2tag = nn.Linear(self.hidden_dim * 2, self.tagset_size)

        # initial crf layer
        self.crf = CRF(self.tagset_size)

    # def init_hidden(self):
    #     return (torch.randn(2, 2, self.hidden_dim),
    #             torch.randn(2, 2, self.hidden_dim))

    def _get_lstm_features(self, sentence,
                           lengths):  # (batch_size, seq_length)
        embeds = self.word_embeds(sentence).transpose(
            1, 0)  # (seq_length, batch_size, embedding_size)
        embeds_packed = pack_padded_sequence(embeds, lengths)
        lstm_out, hidden = self.lstm(
            embeds_packed)  # (seq_length, batch_size, hidden_size)
        lstm_out_padded, _ = pad_packed_sequence(lstm_out)
        lstm_feats = self.hidden2tag(
            lstm_out_padded)  # (seq_length, batch_size, tag_size)
        # print(lstm_feats.shape)
        return lstm_feats

    def neg_log_likelihood(self, sentence, targets, lengths):
        feats = self._get_lstm_features(sentence, lengths)
        # feats: (seq_length, batch_size, tag_size)
        # tags: (batch_size, seq_length)
        mask = (sentence > 0).transpose(1, 0)
        return -self.crf(feats, targets.transpose(0, 1), mask)

    def forward(self, sentence, lengths, concated=False):  # use for prediction
        # Get the emission scores from the BiLSTM
        lstm_feats = self._get_lstm_features(sentence, lengths)

        # Find the best path, given the features.
        mask = (sentence > 0).transpose(1, 0)
        tag_seq = self.crf.decode(lstm_feats, mask, concated)
        return tag_seq
예제 #5
0
class BERT_CRF(nn.Module):
    def __init__(self, args):
        super(BERT_CRF, self).__init__()
        self.embedding_dim = args.embedding_dim
        self.hidden_dim = args.hidden_dim
        # self.vocab_size = args.vocab_size
        # self.tag_to_ix = args.tag_to_ix
        # don't count the padding tag for the classifier output
        self.tagset_size = args.tagset_size
        self.bert_model_name = args.bert_model_name
        # whenever the embedding sees the padding index it'll make the whole vector zeros
        # padding_idx = 0
        # self.word_embeds = nn.Embedding(self.vocab_size, self.embedding_dim, padding_idx=padding_idx)
        if self.bert_model_name.startswith('bert-'):
            self.word_embeds = BertModel.from_pretrained(self.bert_model_name)
            print('load pre-trained model of {}'.format(self.bert_model_name))
        elif self.bert_model_name.startswith('albert-'):
            self.word_embeds = AlbertModel.from_pretrained(
                self.bert_model_name)
            print('load pre-trained model of {}'.format(self.bert_model_name))
        else:
            print('bert model {} not found!!!'.format(self.bert_model_name))

        self.lstm = nn.LSTM(self.embedding_dim,
                            self.hidden_dim,
                            num_layers=1,
                            bidirectional=True,
                            dropout=0.5)
        # self.gru = nn.GRU(self.embedding_dim, self.hidden_dim,
        #                   dropout=0.5, num_layers=1, bidirectional=True)

        # Maps the output of the LSTM into tag space.
        self.hidden2tag = nn.Linear(self.hidden_dim * 2, self.tagset_size)
        # self.linear = nn.Linear(self.embedding_dim, self.tagset_size)

        # initial crf layer
        self.crf = CRF(self.tagset_size)

    # def init_hidden(self):
    #     return (torch.randn(2, 2, self.hidden_dim),
    #             torch.randn(2, 2, self.hidden_dim))

    def _get_lstm_features(self, sentence,
                           lengths):  # (batch_size, seq_length)
        # embeds = self.word_embeds(sentence).transpose(1, 0)  # (seq_length, batch_size, embedding_size)
        attention_mask = (sentence > 0)
        embeds = self.word_embeds(sentence, attention_mask=attention_mask)
        embeds = embeds[0].transpose(0, 1)

        # embeds_packed = pack_padded_sequence(embeds, lengths)
        lstm_out, hidden = self.lstm(
            embeds)  # (seq_length, batch_size, hidden_size)
        # lstm_out_padded, _ = pad_packed_sequence(lstm_out)
        lstm_feats = self.hidden2tag(
            lstm_out)  # (seq_length, batch_size, tag_size)
        # lstm_feats = self.linear(embeds)
        # print(lstm_feats.shape)
        return lstm_feats

    def neg_log_likelihood(self, sentence, targets, lengths):
        feats = self._get_lstm_features(sentence, lengths)
        # feats: (seq_length, batch_size, tag_size)
        # tags: (batch_size, seq_length)
        mask = (sentence > 0).transpose(1, 0)
        return -self.crf(feats, targets.transpose(0, 1), mask)

    def forward(self, sentence, lengths, concated=False):  # use for prediction
        # Get the emission scores from the BiLSTM
        lstm_feats = self._get_lstm_features(sentence, lengths)

        # Find the best path, given the features.
        mask = (sentence > 0).transpose(1, 0)
        tag_seq = self.crf.decode(lstm_feats, mask, concated)
        return tag_seq
예제 #6
0
 def build_model(self, trainable=True):
     if self.embedding_name is None:
         from tensorflow.keras.layers import Input, Dense, LSTM, GRU, Bidirectional, Embedding, Dropout, TimeDistributed, Activation
         from tensorflow.keras import Model
         Input_layer = Input(shape=(None, ), name='Input_layer')
         embedd_layer = Embedding(self.vocab_size, 100)
         x = embedd_layer(Input_layer)
         if self.model_name.lower() == 'lstm':
             x = Dropout(0.4, name='lstm_dropout')(x)
             for i in range(self.layer_number):
                 x = Bidirectional(LSTM(128, return_sequences=True),
                                   merge_mode='concat',
                                   name='bilstm_{}'.format(i))(x)
             x = TimeDistributed(Dense(1), name='Time_Dense')(x)
             x = Activation('sigmoid')(x)
             model = Model(Input_layer, x)
             model.summary()
             return model, 'binary_crossentropy', ['acc'], embedd_layer
         elif self.model_name.lower() == 'gru':
             x = Dropout(0.4, name='gru_dropout')(x)
             for i in range(self.layer_number):
                 x = Bidirectional(GRU(128, return_sequences=True),
                                   merge_mode='concat',
                                   name='bigru_{}'.format(i))(x)
             x = TimeDistributed(Dense(1), name='Time_Dense')(x)
             x = Activation('sigmoid')(x)
             model = Model(Input_layer, x)
             model.summary()
             return model, 'binary_crossentropy', ['acc'], embedd_layer
         elif self.model_name.lower() == 'lstm-crf':
             x = Dropout(0.4, name='lstm_dropout')(x)
             for i in range(self.layer_number):
                 x = Bidirectional(LSTM(128, return_sequences=True),
                                   merge_mode='concat',
                                   name='bilstm_{}'.format(i))(x)
             x = Dense(64, activation='tanh', name='dense_layer')(x)
             x = Dense(1, name='dense_for_crf', activation='sigmoid')(x)
             crf_layer = CRF(1, name='crf')
             x = crf_layer(x)
             model = Model(Input_layer, x)
             model.summary()
             return model, crf_layer.loss, [crf_layer.viterbi_accuracy
                                            ], embedd_layer
         else:
             x = Dropout(0.4, name='gru_dropout')(x)
             for i in range(self.layer_number):
                 x = Bidirectional(GRU(128, return_sequences=True),
                                   merge_mode='concat',
                                   name='bigru_{}'.format(i))(x)
             x = Dense(64, activation='tanh', name='dense_layer')(x)
             x = Dense(1, name='dense_for_crf', activation='sigmoid')(x)
             crf_layer = CRF(1, name='crf')
             x = crf_layer(x)
             model = Model(Input_layer, x)
             model.summary()
             return model, crf_layer.loss, [crf_layer.viterbi_accuracy
                                            ], embedd_layer
     else:
         from keras.layers import Input, Dense, LSTM, GRU, Bidirectional, Embedding, Dropout, TimeDistributed, Activation
         from keras import Model
         assert self.paths is not None
         config_path, checkpoints_path, vocab_path = self.paths
         bert_model = load_trained_model_from_checkpoint(
             config_file=config_path,
             checkpoint_file=checkpoints_path,
             training=trainable)
         inputs = bert_model.inputs[:2]
         x = bert_model.layers[
             -1].output if trainable == False else bert_model.get_layer(
                 name='Encoder-24-FeedForward-Norm').output
         x = Dropout(0.4)(x)
         if self.model_name.lower() == 'lstm':
             for i in range(self.layer_number):
                 x = Bidirectional(LSTM(128, return_sequences=True),
                                   merge_mode='concat',
                                   name='bilstm_{}'.format(i))(x)
             x = TimeDistributed(Dense(1), name='Time_Dense')(x)
             x = Activation('sigmoid')(x)
             model = Model(inputs, x)
             model.summary()
             return model, 'binary_crossentropy', ['acc'], bert_model
         elif self.model_name.lower() == 'gru':
             for i in range(self.layer_number):
                 x = Bidirectional(GRU(128, return_sequences=True),
                                   merge_mode='concat',
                                   name='bigru_{}'.format(i))(x)
             x = TimeDistributed(Dense(1), name='Time_Dense')(x)
             x = Activation('sigmoid')(x)
             model = Model(inputs, x)
             model.summary()
             return model, 'binary_crossentropy', ['acc'], bert_model
         elif self.model_name.lower() == 'lstm-crf':
             for i in range(self.layer_number):
                 x = Bidirectional(LSTM(128, return_sequences=True),
                                   merge_mode='concat',
                                   name='bilstm_{}'.format(i))(x)
             x = Dense(64, activation='tanh', name='dense_layer')(x)
             x = Dense(1, name='dense_for_crf', activation='sigmoid')(x)
             crf_layer = CRF(1, name='crf')
             x = crf_layer(x)
             model = Model(inputs, x)
             model.summary()
             return model, crf_layer.loss, [crf_layer.viterbi_accuracy
                                            ], bert_model
         else:
             for i in range(self.layer_number):
                 x = Bidirectional(GRU(128, return_sequences=True),
                                   merge_mode='concat',
                                   name='bigru_{}'.format(i))(x)
             x = Dense(64, activation='tanh', name='dense_layer')(x)
             x = Dense(1, name='dense_for_crf', activation='sigmoid')(x)
             crf_layer = CRF(1, name='crf')
             x = crf_layer(x)
             model = Model(inputs, x)
             model.summary()
             return model, crf_layer.loss, [crf_layer.viterbi_accuracy
                                            ], bert_model