def create_crf_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings):
    """
        create model
        :param bert_config: bert config
        :param is_training:
        :param input_ids: idx of input data
        :param input_mask:
        :param segment_ids:
        :param labels: idx of labels
        :param num_labels: type of labels
        :param use_one_hot_embeddings:
        :return:
        """
    # representation
    model = modeling.BertModel(
        config=bert_config,
        is_training=is_training,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=use_one_hot_embeddings
    )

    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value
    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used, reduction_indices=1)

    crf = CRF(embedded_chars=embedding,droupout_rate=0.5,initializers=initializers, num_labels=num_labels,
                    seq_length=max_seq_length, labels=labels, lengths=lengths,
                    is_training=is_training)
    (total_loss, logits, trans, pred_ids) = crf.add_crf_layer()

    return (total_loss, logits, trans, pred_ids)
예제 #2
0
    def build(self):
        self.model = Sequential()

        self.model.add(
            Embedding(input_dim=self.n_vocab,
                      output_dim=self.n_embed,
                      input_length=self.n_input,
                      weights=[self.embedding_mat],
                      mask_zero=True,
                      trainable=True))
        self.model.add(Dropout(self.keep_prob))

        self.model.add(
            Bidirectional(
                GRU(self.n_lstm,
                    return_sequences=True,
                    dropout=self.keep_prob_lstm,
                    recurrent_dropout=self.keep_prob_lstm)))
        self.model.add(TimeDistributed(Dropout(self.keep_prob)))

        # crf = CRF(units=self.n_entity, learn_mode='join',
        #           test_mode='viterbi', sparse_target=False)
        crf = CRF(units=self.n_entity,
                  learn_mode='marginal',
                  test_mode='marginal',
                  sparse_target=False)
        self.model.add(crf)

        self.model.compile(optimizer=self.optimizer,
                           loss=crf.loss_function,
                           metrics=[crf.accuracy])
예제 #3
0
    def model_bert_bilstm_crf(self):
        bert_model = load_trained_model_from_checkpoint(
            self.config_path, self.checkpoint_path)
        for l in bert_model.layers:
            l.trainable = True

        x_input1 = Input(shape=(None, ))
        x_input2 = Input(shape=(None, ))
        x = bert_model([x_input1, x_input2])
        bilstm = Bidirectional(LSTM(64,
                                    return_sequences=True,
                                    dropout=0.35,
                                    recurrent_dropout=0.35),
                               name='BiLSTM')(x)
        hidden = TimeDistributed(Dense(32, activation=None),
                                 name='hidden_layer')(bilstm)
        crf = CRF(units=13,
                  learn_mode='join',
                  test_mode='viterbi',
                  sparse_target=False)
        output = crf(hidden)
        model = Model(inputs=[x_input1, x_input2], outputs=output)
        adam = Adam(lr=2e-4)
        model.compile(optimizer=adam,
                      loss=crf.loss_function,
                      metrics=[crf.accuracy])
        model.summary()
        return model
예제 #4
0
    def build_attention(self):
        # main
        char_input = Input(shape=(self.n_input_char, ))

        char_embed = Embedding(input_dim=self.n_vocab_char,
                               output_dim=self.n_embed_char,
                               input_length=self.n_input_char,
                               weights=[self.char_embedding_mat],
                               mask_zero=False,
                               trainable=True)(char_input)
        char_embed_drop = Dropout(self.keep_prob)(char_embed)
        # auxiliary
        word_input = Input(shape=(self.n_input_word, ))
        word_embed = Embedding(input_dim=self.n_vocab_word,
                               output_dim=self.n_embed_word,
                               input_length=self.n_input_word,
                               weights=[self.word_embedding_mat],
                               mask_zero=False,
                               trainable=True)(word_input)
        word_embed_drop = Dropout(self.keep_prob)(word_embed)
        # 使用CNN提取word的n_gram特征
        word_conv = Conv1D(self.n_filter,
                           kernel_size=self.kernel_size,
                           strides=1,
                           padding='same',
                           kernel_initializer='he_normal')(word_embed_drop)
        word_conv = BatchNormalization(axis=-1)(word_conv)
        word_conv = LeakyReLU(alpha=1 / 5.5)(word_conv)
        # concatenation
        concat = Concatenate(axis=-1)([char_embed_drop, word_conv])
        concat_drop = TimeDistributed(Dropout(self.keep_prob))(concat)

        # #attention
        attention_probs = Dense(int(concat_drop.shape[2]),
                                activation='softmax',
                                name='attention_vec')(concat_drop)
        attention_mul = merge([concat_drop, attention_probs],
                              name='attention_mul',
                              mode='mul')

        bilstm = Bidirectional(
            LSTM(units=self.n_lstm,
                 return_sequences=True,
                 dropout=self.keep_prob_lstm,
                 recurrent_dropout=self.keep_prob_lstm))(attention_mul)

        crf = CRF(units=self.n_entity,
                  learn_mode='join',
                  test_mode='viterbi',
                  sparse_target=False)
        output = crf(bilstm)

        self.model_attention = Model(inputs=[char_input, word_input],
                                     outputs=output)
        self.model_attention.compile(optimizer=self.optimizer,
                                     loss=crf.loss_function,
                                     metrics=[crf.accuracy])
        # plot_model(self.model_attention, to_file="model_png/character_model_attention.png", show_shapes=False)
        print(self.model_attention.summary())
예제 #5
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """
    创建X模型
    :param bert_config: bert 配置
    :param is_training:
    :param input_ids: 数据的idx 表示
    :param input_mask:
    :param segment_ids:
    :param labels: 标签的idx 表示
    :param num_labels: 类别数量
    :param use_one_hot_embeddings:
    :return:
    """
    # 使用数据加载BertModel,获取对应的字embedding
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size]
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(
        used, reduction_indices=1)  # [batch_size] 大小的向量,包含了当前batch中的序列长度

    blstm_crf = CRF(embedded_chars=embedding,
                    hidden_unit=FLAGS.lstm_size,
                    cell_type=FLAGS.cell,
                    num_layers=FLAGS.num_layers,
                    droupout_rate=FLAGS.droupout_rate,
                    initializers=initializers,
                    num_labels=num_labels,
                    seq_length=max_seq_length,
                    labels=labels,
                    lengths=lengths,
                    is_training=is_training)
    rst = blstm_crf.add_crf_layer()
    return rst
예제 #6
0
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids,
                 labels, num_labels, use_one_hot_embeddings):
    """
    create model
    :param bert_config: bert cofig
    :param is_training:
    :param input_ids: idx of data
    :param input_mask:
    :param segment_ids:
    :param labels: idx of label
    :param num_labels: number of categories
    :param use_one_hot_embeddings:
    :return:
    """
    # load BertModel, and acuqire the corresponding embedding
    model = modeling.BertModel(config=bert_config,
                               is_training=is_training,
                               input_ids=input_ids,
                               input_mask=input_mask,
                               token_type_ids=segment_ids,
                               use_one_hot_embeddings=use_one_hot_embeddings)
    # acuqire the corresponding embedding
    embedding = model.get_sequence_output()
    max_seq_length = embedding.shape[1].value

    used = tf.sign(tf.abs(input_ids))
    lengths = tf.reduce_sum(used, reduction_indices=1)  # [batch_size]

    crf = CRF(embedded_chars=embedding,
              droupout_rate=FLAGS.droupout_rate,
              initializers=initializers,
              num_labels=num_labels,
              seq_length=max_seq_length,
              labels=labels,
              lengths=lengths,
              is_training=is_training)
    rst = crf.add_crf_layer()
    return rst
    def build2(self):
        # main
        char_input = Input(shape=(self.n_input_char, ))
        char_embed = Embedding(input_dim=self.n_vocab_char,
                               output_dim=self.n_embed_char,
                               input_length=self.n_input_char,
                               weights=[self.char_embedding_mat],
                               mask_zero=False,
                               trainable=True)(char_input)
        char_embed_drop = Dropout(self.keep_prob)(char_embed)
        # auxiliary
        word_input = Input(shape=(self.n_input_word, ))
        word_embed = Embedding(input_dim=self.n_vocab_word,
                               output_dim=self.n_embed_word,
                               input_length=self.n_input_word,
                               weights=[self.word_embedding_mat],
                               mask_zero=False,
                               trainable=True)(word_input)
        word_embed_drop = Dropout(self.keep_prob)(word_embed)
        # 使用CNN提取word的n_gram特征
        word_conv = Conv1D(self.n_filter,
                           kernel_size=self.kernel_size,
                           strides=1,
                           padding='same',
                           kernel_initializer='he_normal')(word_embed_drop)
        word_conv = BatchNormalization(axis=-1)(word_conv)
        word_conv = LeakyReLU(alpha=1 / 5.5)(word_conv)
        #alpha=1/5.5
        #word_conv = tf.maximum(alpha*word_conv, word_conv)
        # concatenation
        concat = Concatenate(axis=-1)([char_embed, word_conv])
        concat_drop = TimeDistributed(Dropout(self.keep_prob))(concat)

        bilstm = Bidirectional(
            LSTM(units=self.n_lstm,
                 return_sequences=True,
                 dropout=self.keep_prob_lstm,
                 recurrent_dropout=self.keep_prob_lstm))(concat_drop)

        crf = CRF(units=self.n_entity,
                  learn_mode='join',
                  test_mode='viterbi',
                  sparse_target=False)
        output = crf(bilstm)

        self.model2 = Model(inputs=[char_input, word_input], outputs=output)
        self.model2.compile(optimizer=self.optimizer,
                            loss=crf.loss_function,
                            metrics=[crf.accuracy])
예제 #8
0
    def __init__(self, hidden_num, vocab_size, label_size, embedding_size):
        super(BiLSTMCRF, self).__init__()
        self.num_hidden = hidden_num
        self.vocab_size = vocab_size
        self.label_size = label_size
        self.transition_params = None

        # layers
        self.embedding = tf.keras.layers.Embedding(
            vocab_size, embedding_size, mask_zero=True)
        self.dropout = tf.keras.layers.Dropout(0.5)
        self.biLSTM = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(hidden_num, return_sequences=True))
        self.dense = tf.keras.layers.Dense(label_size)
        self.crf = CRF(label_size)
예제 #9
0
    def build4(self):
        char_input = Input(shape=(self.n_input_char, ), name='main_input')
        char_embed = Embedding(input_dim=self.n_vocab_char,
                               output_dim=self.n_embed_char,
                               weights=[self.char_embedding_mat],
                               input_length=self.n_input_char,
                               mask_zero=False,
                               trainable=True)(char_input)
        char_embed_drop = Dropout(self.keep_prob)(char_embed)
        # 使用cnn提取字符级特征
        char_conv = Conv1D(filters=self.n_filter,
                           kernel_size=self.kernel_size,
                           strides=1,
                           padding='same',
                           kernel_initializer='he_normal')(char_embed_drop)
        char_conv = BatchNormalization(axis=-1)(char_conv)
        char_conv = LeakyReLU(alpha=1 / 5.5)(char_conv)
        # char_pool = MaxPooling1D(self.pool_size)(char_conv)
        # char_flaten = Flatten()(char_pool)
        # auxiliary
        word_input = Input(shape=(self.n_input_word, ), name='auxiliary_input')
        word_embed = Embedding(input_dim=self.n_vocab_word,
                               output_dim=self.n_embed_word,
                               weights=[self.word_embedding_mat],
                               input_length=self.n_input_word,
                               mask_zero=True,
                               trainable=True)(word_input)
        word_embed_drop = Dropout(self.keep_prob)(word_embed)

        # concatentation
        concat = concatenate([char_conv, word_embed_drop])
        concat_drop = TimeDistributed(Dropout(self.keep_prob))(concat)

        lstm = Bidirectional(
            GRU(self.n_lstm,
                return_sequences=True,
                dropout=self.keep_prob_lstm,
                recurrent_dropout=self.keep_prob_lstm))(concat_drop)

        crf = CRF(units=self.n_entity,
                  learn_mode='join',
                  test_mode='viterbi',
                  sparse_target=False)
        output = crf(lstm)
        self.model4 = Model(inputs=[char_input, word_input], outputs=output)
        self.model4.compile(optimizer=self.optimizer,
                            loss=crf.loss_function,
                            metrics=[crf.accuracy])
예제 #10
0
    def build(self):
        # main
        char_input = Input(shape=(self.n_input_char, ), name='main_input')
        char_embed = Embedding(input_dim=self.n_vocab_char,
                               output_dim=self.n_embed_char,
                               weights=[self.char_embedding_mat],
                               input_length=self.n_input_char,
                               mask_zero=True,
                               trainable=True)(char_input)
        char_embed_drop = Dropout(self.keep_prob)(char_embed)
        bilstm = Bidirectional(
            GRU(self.n_lstm,
                return_sequences=True,
                dropout=self.keep_prob_lstm,
                recurrent_dropout=self.keep_prob_lstm))(char_embed_drop)
        # auxiliary
        word_input = Input(shape=(self.n_input_word, ), name='auxiliary_input')
        word_embed = Embedding(input_dim=self.n_vocab_word,
                               output_dim=self.n_embed_word,
                               weights=[self.word_embedding_mat],
                               input_length=self.n_input_word,
                               mask_zero=True,
                               trainable=True)(word_input)
        word_embed_drop = Dropout(self.keep_prob)(word_embed)
        lstm = Bidirectional(
            GRU(self.n_lstm,
                return_sequences=True,
                dropout=self.keep_prob_lstm,
                recurrent_dropout=self.keep_prob_lstm))(word_embed_drop)

        # concatenation
        concat = Concatenate(axis=-1)([bilstm, lstm])
        concat_drop = TimeDistributed(Dropout(self.keep_prob))(concat)

        crf = CRF(units=self.n_entity,
                  learn_mode='join',
                  test_mode='viterbi',
                  sparse_target=False)
        output = crf(concat_drop)

        self.model = Model(inputs=[char_input, word_input], outputs=output)
        self.model.compile(optimizer=self.optimizer,
                           loss=crf.loss_function,
                           metrics=[crf.accuracy])
예제 #11
0
    def build(self):
        inputs = keras.layers.Input(shape=(self.max_len, ), dtype='int32')
        x = keras.layers.Masking(mask_value=0)(inputs)
        x = keras.layers.Embedding(input_dim=self.vocab_size,
                                   output_dim=self.embedding_dim,
                                   trainable=False,
                                   weights=self.embedding_matrix,
                                   mask_zero=True)(x)
        x = keras.layers.Bidirectional(
            keras.layers.LSTM(self.lstm_units, return_sequences=True))(x)
        x = keras.layers.TimeDistributed(keras.layers.Dropout(0.2))(x)
        crf = CRF(self.class_nums)
        outputs = crf(x)
        model = keras.Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer='adam',
                      loss=crf.loss_function,
                      metrics=[crf.accuracy])
        print(model.summary())

        return model
예제 #12
0
    def build_attention(self):
        char_input = Input(shape=(self.n_input, ), name='main_input')
        char_embed = Embedding(input_dim=self.n_vocab,
                               output_dim=self.n_embed,
                               input_length=self.n_input,
                               weights=[self.embedding_mat],
                               mask_zero=False,
                               trainable=True)(char_input)
        char_drop = Dropout(self.keep_prob)(char_embed)
        # attention
        attention_probs = Dense(int(char_drop.shape[2]),
                                activation='softmax',
                                name='attention_vec')(char_drop)
        attention_mul = merge([char_drop, attention_probs],
                              output_shape=32,
                              name='attention_mul',
                              mode='mul')
        blstm = Bidirectional(
            LSTM(self.n_lstm,
                 return_sequences=True,
                 dropout=self.keep_prob_lstm,
                 recurrent_dropout=self.keep_prob_lstm))(attention_mul)

        crf = CRF(units=self.n_entity,
                  learn_mode='join',
                  test_mode='viterbi',
                  sparse_target=False)

        output = crf(blstm)

        self.model_attention = Model(inputs=[char_input], outputs=output)

        self.model_attention.compile(optimizer=self.optimizer,
                                     loss=crf.loss_function,
                                     metrics=[crf.accuracy])
        print(self.model_attention.summary())
        print((self.model_attention.summary()))
        plot_model(self.model_attention,
                   to_file="model_png/character_model_attention.png",
                   show_shapes=False)
예제 #13
0
    def __init__(self, word_embedding_dim, word_hidden_dim, word_lstm_layers,
                 vocab_size, char_size, char_embedding_dim,
                 char_lstm_hidden_dim, cnn_filter_num, char_lstm_layers,
                 char_lstm, dropout_ratio, if_highway, highway_layers,
                 crf_start_tag, crf_end_tag, crf_target_size, scrf_tag_map,
                 scrf_dense_dim, in_doc_words, index_embeds_dim,
                 ALLOWED_SPANLEN, scrf_start_tag, scrf_end_tag, grconv):

        super(ner_model, self).__init__()

        self.char_lstm = char_lstm
        self.word_rep = WORD_REP(char_size,
                                 char_embedding_dim,
                                 char_lstm_hidden_dim,
                                 cnn_filter_num,
                                 char_lstm_layers,
                                 word_embedding_dim,
                                 word_hidden_dim,
                                 word_lstm_layers,
                                 vocab_size,
                                 dropout_ratio,
                                 if_highway=if_highway,
                                 in_doc_words=in_doc_words,
                                 highway_layers=highway_layers,
                                 char_lstm=char_lstm)

        self.crf = CRF(crf_start_tag, crf_end_tag, word_hidden_dim,
                       crf_target_size)

        self.hscrf = HSCRF(scrf_tag_map,
                           word_rep_dim=word_hidden_dim,
                           SCRF_feature_dim=scrf_dense_dim,
                           index_embeds_dim=index_embeds_dim,
                           ALLOWED_SPANLEN=ALLOWED_SPANLEN,
                           start_id=scrf_start_tag,
                           stop_id=scrf_end_tag,
                           grconv=grconv)
예제 #14
0
from keras.models import Model  # 这里我们学习使用Model型的模型
import keras.backend as K  # 引入Keras后端来自定义loss,注意Keras模型内的一切运算

# 必须要通过Keras后端完成,比如取对数要用K.log不能用np.log

embedding_size = 128
sequence = Input(shape=(None, ), dtype='int32')  # 建立输入层,输入长度设为None
embedding = Embedding(
    len(chars) + 1,
    embedding_size,
)(sequence)  # 去掉了mask_zero=True
cnn = Conv1D(128, 3, activation='relu', padding='same')(embedding)
cnn = Conv1D(128, 3, activation='relu', padding='same')(cnn)
cnn = Conv1D(128, 3, activation='relu', padding='same')(cnn)  # 层叠了3层CNN

crf = CRF(True)  # 定义crf层,参数为True,自动mask掉最后一个标签
tag_score = Dense(5)(cnn)  # 变成了5分类,第五个标签用来mask掉
tag_score = crf(tag_score)  # 包装一下原来的tag_score

model = Model(inputs=sequence, outputs=tag_score)
model.summary()

model.compile(
    loss=crf.loss,  # 用crf自带的loss
    optimizer='adam',
    metrics=[crf.accuracy]  # 用crf自带的accuracy
)


def max_in_dict(d):  # 定义一个求字典中最大值的函数
    key, value = list(d.items())[0]
예제 #15
0
    def build(self,
              word_length,
              num_labels,
              num_intent_labels,
              word_vocab_size,
              char_vocab_size,
              word_emb_dims=100,
              char_emb_dims=30,
              char_lstm_dims=30,
              tagger_lstm_dims=100,
              dropout=0.2):

        self.word_length = word_length
        self.num_labels = num_labels
        self.num_intent_labels = num_intent_labels
        self.word_vocab_size = word_vocab_size
        self.char_vocab_size = char_vocab_size

        words_input = Input(shape=(None, ), name='words_input')
        embedding_layer = Embedding(word_vocab_size,
                                    word_emb_dims,
                                    name='word_embedding')
        word_embeddings = embedding_layer(words_input)
        word_embeddings = Dropout(dropout)(word_embeddings)

        word_chars_input = Input(shape=(None, word_length),
                                 name='word_chars_input')
        char_embedding_layer = Embedding(char_vocab_size,
                                         char_emb_dims,
                                         input_length=word_length,
                                         name='char_embedding')
        char_embeddings = char_embedding_layer(word_chars_input)
        char_embeddings = TimeDistributed(Bidirectional(
            LSTM(char_lstm_dims)))(char_embeddings)
        char_embeddings = Dropout(dropout)(char_embeddings)

        # first BiLSTM layer (used for intent classification)
        first_bilstm_layer = Bidirectional(
            LSTM(tagger_lstm_dims, return_sequences=True, return_state=True))
        first_lstm_out = first_bilstm_layer(word_embeddings)

        lstm_y_sequence = first_lstm_out[:1][
            0]  # save y states of the LSTM layer
        states = first_lstm_out[1:]
        hf, _, hb, _ = states  # extract last hidden states
        h_state = concatenate([hf, hb], axis=-1)
        intents = Dense(num_intent_labels,
                        activation='softmax',
                        name='intent_classifier_output')(h_state)
        # create the 2nd feature vectors
        combined_features = concatenate([lstm_y_sequence, char_embeddings],
                                        axis=-1)

        # 2nd BiLSTM layer (used for entity/slots classification)
        second_bilstm_layer = Bidirectional(
            LSTM(tagger_lstm_dims, return_sequences=True))(combined_features)
        second_bilstm_layer = Dropout(dropout)(second_bilstm_layer)
        bilstm_out = Dense(num_labels)(second_bilstm_layer)

        # feed BiLSTM vectors into CRF
        crf = CRF(num_labels, name='intent_slot_crf')
        entities = crf(bilstm_out)

        model = Model(inputs=[words_input, word_chars_input],
                      outputs=[intents, entities])

        loss_f = {
            'intent_classifier_output': 'categorical_crossentropy',
            'intent_slot_crf': crf.loss
        }
        metrics = {
            'intent_classifier_output': 'categorical_accuracy',
            'intent_slot_crf': crf.viterbi_accuracy
        }
        model.compile(loss=loss_f, optimizer=AdamOptimizer(), metrics=metrics)
        self.model = model