def create_crf_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """ create model :param bert_config: bert config :param is_training: :param input_ids: idx of input data :param input_mask: :param segment_ids: :param labels: idx of labels :param num_labels: type of labels :param use_one_hot_embeddings: :return: """ # representation model = modeling.BertModel( config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings ) embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum(used, reduction_indices=1) crf = CRF(embedded_chars=embedding,droupout_rate=0.5,initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) (total_loss, logits, trans, pred_ids) = crf.add_crf_layer() return (total_loss, logits, trans, pred_ids)
def build(self): self.model = Sequential() self.model.add( Embedding(input_dim=self.n_vocab, output_dim=self.n_embed, input_length=self.n_input, weights=[self.embedding_mat], mask_zero=True, trainable=True)) self.model.add(Dropout(self.keep_prob)) self.model.add( Bidirectional( GRU(self.n_lstm, return_sequences=True, dropout=self.keep_prob_lstm, recurrent_dropout=self.keep_prob_lstm))) self.model.add(TimeDistributed(Dropout(self.keep_prob))) # crf = CRF(units=self.n_entity, learn_mode='join', # test_mode='viterbi', sparse_target=False) crf = CRF(units=self.n_entity, learn_mode='marginal', test_mode='marginal', sparse_target=False) self.model.add(crf) self.model.compile(optimizer=self.optimizer, loss=crf.loss_function, metrics=[crf.accuracy])
def model_bert_bilstm_crf(self): bert_model = load_trained_model_from_checkpoint( self.config_path, self.checkpoint_path) for l in bert_model.layers: l.trainable = True x_input1 = Input(shape=(None, )) x_input2 = Input(shape=(None, )) x = bert_model([x_input1, x_input2]) bilstm = Bidirectional(LSTM(64, return_sequences=True, dropout=0.35, recurrent_dropout=0.35), name='BiLSTM')(x) hidden = TimeDistributed(Dense(32, activation=None), name='hidden_layer')(bilstm) crf = CRF(units=13, learn_mode='join', test_mode='viterbi', sparse_target=False) output = crf(hidden) model = Model(inputs=[x_input1, x_input2], outputs=output) adam = Adam(lr=2e-4) model.compile(optimizer=adam, loss=crf.loss_function, metrics=[crf.accuracy]) model.summary() return model
def build_attention(self): # main char_input = Input(shape=(self.n_input_char, )) char_embed = Embedding(input_dim=self.n_vocab_char, output_dim=self.n_embed_char, input_length=self.n_input_char, weights=[self.char_embedding_mat], mask_zero=False, trainable=True)(char_input) char_embed_drop = Dropout(self.keep_prob)(char_embed) # auxiliary word_input = Input(shape=(self.n_input_word, )) word_embed = Embedding(input_dim=self.n_vocab_word, output_dim=self.n_embed_word, input_length=self.n_input_word, weights=[self.word_embedding_mat], mask_zero=False, trainable=True)(word_input) word_embed_drop = Dropout(self.keep_prob)(word_embed) # 使用CNN提取word的n_gram特征 word_conv = Conv1D(self.n_filter, kernel_size=self.kernel_size, strides=1, padding='same', kernel_initializer='he_normal')(word_embed_drop) word_conv = BatchNormalization(axis=-1)(word_conv) word_conv = LeakyReLU(alpha=1 / 5.5)(word_conv) # concatenation concat = Concatenate(axis=-1)([char_embed_drop, word_conv]) concat_drop = TimeDistributed(Dropout(self.keep_prob))(concat) # #attention attention_probs = Dense(int(concat_drop.shape[2]), activation='softmax', name='attention_vec')(concat_drop) attention_mul = merge([concat_drop, attention_probs], name='attention_mul', mode='mul') bilstm = Bidirectional( LSTM(units=self.n_lstm, return_sequences=True, dropout=self.keep_prob_lstm, recurrent_dropout=self.keep_prob_lstm))(attention_mul) crf = CRF(units=self.n_entity, learn_mode='join', test_mode='viterbi', sparse_target=False) output = crf(bilstm) self.model_attention = Model(inputs=[char_input, word_input], outputs=output) self.model_attention.compile(optimizer=self.optimizer, loss=crf.loss_function, metrics=[crf.accuracy]) # plot_model(self.model_attention, to_file="model_png/character_model_attention.png", show_shapes=False) print(self.model_attention.summary())
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """ 创建X模型 :param bert_config: bert 配置 :param is_training: :param input_ids: 数据的idx 表示 :param input_mask: :param segment_ids: :param labels: 标签的idx 表示 :param num_labels: 类别数量 :param use_one_hot_embeddings: :return: """ # 使用数据加载BertModel,获取对应的字embedding model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # 获取对应的embedding 输入数据[batch_size, seq_length, embedding_size] embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum( used, reduction_indices=1) # [batch_size] 大小的向量,包含了当前batch中的序列长度 blstm_crf = CRF(embedded_chars=embedding, hidden_unit=FLAGS.lstm_size, cell_type=FLAGS.cell, num_layers=FLAGS.num_layers, droupout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = blstm_crf.add_crf_layer() return rst
def create_model(bert_config, is_training, input_ids, input_mask, segment_ids, labels, num_labels, use_one_hot_embeddings): """ create model :param bert_config: bert cofig :param is_training: :param input_ids: idx of data :param input_mask: :param segment_ids: :param labels: idx of label :param num_labels: number of categories :param use_one_hot_embeddings: :return: """ # load BertModel, and acuqire the corresponding embedding model = modeling.BertModel(config=bert_config, is_training=is_training, input_ids=input_ids, input_mask=input_mask, token_type_ids=segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # acuqire the corresponding embedding embedding = model.get_sequence_output() max_seq_length = embedding.shape[1].value used = tf.sign(tf.abs(input_ids)) lengths = tf.reduce_sum(used, reduction_indices=1) # [batch_size] crf = CRF(embedded_chars=embedding, droupout_rate=FLAGS.droupout_rate, initializers=initializers, num_labels=num_labels, seq_length=max_seq_length, labels=labels, lengths=lengths, is_training=is_training) rst = crf.add_crf_layer() return rst
def build2(self): # main char_input = Input(shape=(self.n_input_char, )) char_embed = Embedding(input_dim=self.n_vocab_char, output_dim=self.n_embed_char, input_length=self.n_input_char, weights=[self.char_embedding_mat], mask_zero=False, trainable=True)(char_input) char_embed_drop = Dropout(self.keep_prob)(char_embed) # auxiliary word_input = Input(shape=(self.n_input_word, )) word_embed = Embedding(input_dim=self.n_vocab_word, output_dim=self.n_embed_word, input_length=self.n_input_word, weights=[self.word_embedding_mat], mask_zero=False, trainable=True)(word_input) word_embed_drop = Dropout(self.keep_prob)(word_embed) # 使用CNN提取word的n_gram特征 word_conv = Conv1D(self.n_filter, kernel_size=self.kernel_size, strides=1, padding='same', kernel_initializer='he_normal')(word_embed_drop) word_conv = BatchNormalization(axis=-1)(word_conv) word_conv = LeakyReLU(alpha=1 / 5.5)(word_conv) #alpha=1/5.5 #word_conv = tf.maximum(alpha*word_conv, word_conv) # concatenation concat = Concatenate(axis=-1)([char_embed, word_conv]) concat_drop = TimeDistributed(Dropout(self.keep_prob))(concat) bilstm = Bidirectional( LSTM(units=self.n_lstm, return_sequences=True, dropout=self.keep_prob_lstm, recurrent_dropout=self.keep_prob_lstm))(concat_drop) crf = CRF(units=self.n_entity, learn_mode='join', test_mode='viterbi', sparse_target=False) output = crf(bilstm) self.model2 = Model(inputs=[char_input, word_input], outputs=output) self.model2.compile(optimizer=self.optimizer, loss=crf.loss_function, metrics=[crf.accuracy])
def __init__(self, hidden_num, vocab_size, label_size, embedding_size): super(BiLSTMCRF, self).__init__() self.num_hidden = hidden_num self.vocab_size = vocab_size self.label_size = label_size self.transition_params = None # layers self.embedding = tf.keras.layers.Embedding( vocab_size, embedding_size, mask_zero=True) self.dropout = tf.keras.layers.Dropout(0.5) self.biLSTM = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(hidden_num, return_sequences=True)) self.dense = tf.keras.layers.Dense(label_size) self.crf = CRF(label_size)
def build4(self): char_input = Input(shape=(self.n_input_char, ), name='main_input') char_embed = Embedding(input_dim=self.n_vocab_char, output_dim=self.n_embed_char, weights=[self.char_embedding_mat], input_length=self.n_input_char, mask_zero=False, trainable=True)(char_input) char_embed_drop = Dropout(self.keep_prob)(char_embed) # 使用cnn提取字符级特征 char_conv = Conv1D(filters=self.n_filter, kernel_size=self.kernel_size, strides=1, padding='same', kernel_initializer='he_normal')(char_embed_drop) char_conv = BatchNormalization(axis=-1)(char_conv) char_conv = LeakyReLU(alpha=1 / 5.5)(char_conv) # char_pool = MaxPooling1D(self.pool_size)(char_conv) # char_flaten = Flatten()(char_pool) # auxiliary word_input = Input(shape=(self.n_input_word, ), name='auxiliary_input') word_embed = Embedding(input_dim=self.n_vocab_word, output_dim=self.n_embed_word, weights=[self.word_embedding_mat], input_length=self.n_input_word, mask_zero=True, trainable=True)(word_input) word_embed_drop = Dropout(self.keep_prob)(word_embed) # concatentation concat = concatenate([char_conv, word_embed_drop]) concat_drop = TimeDistributed(Dropout(self.keep_prob))(concat) lstm = Bidirectional( GRU(self.n_lstm, return_sequences=True, dropout=self.keep_prob_lstm, recurrent_dropout=self.keep_prob_lstm))(concat_drop) crf = CRF(units=self.n_entity, learn_mode='join', test_mode='viterbi', sparse_target=False) output = crf(lstm) self.model4 = Model(inputs=[char_input, word_input], outputs=output) self.model4.compile(optimizer=self.optimizer, loss=crf.loss_function, metrics=[crf.accuracy])
def build(self): # main char_input = Input(shape=(self.n_input_char, ), name='main_input') char_embed = Embedding(input_dim=self.n_vocab_char, output_dim=self.n_embed_char, weights=[self.char_embedding_mat], input_length=self.n_input_char, mask_zero=True, trainable=True)(char_input) char_embed_drop = Dropout(self.keep_prob)(char_embed) bilstm = Bidirectional( GRU(self.n_lstm, return_sequences=True, dropout=self.keep_prob_lstm, recurrent_dropout=self.keep_prob_lstm))(char_embed_drop) # auxiliary word_input = Input(shape=(self.n_input_word, ), name='auxiliary_input') word_embed = Embedding(input_dim=self.n_vocab_word, output_dim=self.n_embed_word, weights=[self.word_embedding_mat], input_length=self.n_input_word, mask_zero=True, trainable=True)(word_input) word_embed_drop = Dropout(self.keep_prob)(word_embed) lstm = Bidirectional( GRU(self.n_lstm, return_sequences=True, dropout=self.keep_prob_lstm, recurrent_dropout=self.keep_prob_lstm))(word_embed_drop) # concatenation concat = Concatenate(axis=-1)([bilstm, lstm]) concat_drop = TimeDistributed(Dropout(self.keep_prob))(concat) crf = CRF(units=self.n_entity, learn_mode='join', test_mode='viterbi', sparse_target=False) output = crf(concat_drop) self.model = Model(inputs=[char_input, word_input], outputs=output) self.model.compile(optimizer=self.optimizer, loss=crf.loss_function, metrics=[crf.accuracy])
def build(self): inputs = keras.layers.Input(shape=(self.max_len, ), dtype='int32') x = keras.layers.Masking(mask_value=0)(inputs) x = keras.layers.Embedding(input_dim=self.vocab_size, output_dim=self.embedding_dim, trainable=False, weights=self.embedding_matrix, mask_zero=True)(x) x = keras.layers.Bidirectional( keras.layers.LSTM(self.lstm_units, return_sequences=True))(x) x = keras.layers.TimeDistributed(keras.layers.Dropout(0.2))(x) crf = CRF(self.class_nums) outputs = crf(x) model = keras.Model(inputs=inputs, outputs=outputs) model.compile(optimizer='adam', loss=crf.loss_function, metrics=[crf.accuracy]) print(model.summary()) return model
def build_attention(self): char_input = Input(shape=(self.n_input, ), name='main_input') char_embed = Embedding(input_dim=self.n_vocab, output_dim=self.n_embed, input_length=self.n_input, weights=[self.embedding_mat], mask_zero=False, trainable=True)(char_input) char_drop = Dropout(self.keep_prob)(char_embed) # attention attention_probs = Dense(int(char_drop.shape[2]), activation='softmax', name='attention_vec')(char_drop) attention_mul = merge([char_drop, attention_probs], output_shape=32, name='attention_mul', mode='mul') blstm = Bidirectional( LSTM(self.n_lstm, return_sequences=True, dropout=self.keep_prob_lstm, recurrent_dropout=self.keep_prob_lstm))(attention_mul) crf = CRF(units=self.n_entity, learn_mode='join', test_mode='viterbi', sparse_target=False) output = crf(blstm) self.model_attention = Model(inputs=[char_input], outputs=output) self.model_attention.compile(optimizer=self.optimizer, loss=crf.loss_function, metrics=[crf.accuracy]) print(self.model_attention.summary()) print((self.model_attention.summary())) plot_model(self.model_attention, to_file="model_png/character_model_attention.png", show_shapes=False)
def __init__(self, word_embedding_dim, word_hidden_dim, word_lstm_layers, vocab_size, char_size, char_embedding_dim, char_lstm_hidden_dim, cnn_filter_num, char_lstm_layers, char_lstm, dropout_ratio, if_highway, highway_layers, crf_start_tag, crf_end_tag, crf_target_size, scrf_tag_map, scrf_dense_dim, in_doc_words, index_embeds_dim, ALLOWED_SPANLEN, scrf_start_tag, scrf_end_tag, grconv): super(ner_model, self).__init__() self.char_lstm = char_lstm self.word_rep = WORD_REP(char_size, char_embedding_dim, char_lstm_hidden_dim, cnn_filter_num, char_lstm_layers, word_embedding_dim, word_hidden_dim, word_lstm_layers, vocab_size, dropout_ratio, if_highway=if_highway, in_doc_words=in_doc_words, highway_layers=highway_layers, char_lstm=char_lstm) self.crf = CRF(crf_start_tag, crf_end_tag, word_hidden_dim, crf_target_size) self.hscrf = HSCRF(scrf_tag_map, word_rep_dim=word_hidden_dim, SCRF_feature_dim=scrf_dense_dim, index_embeds_dim=index_embeds_dim, ALLOWED_SPANLEN=ALLOWED_SPANLEN, start_id=scrf_start_tag, stop_id=scrf_end_tag, grconv=grconv)
from keras.models import Model # 这里我们学习使用Model型的模型 import keras.backend as K # 引入Keras后端来自定义loss,注意Keras模型内的一切运算 # 必须要通过Keras后端完成,比如取对数要用K.log不能用np.log embedding_size = 128 sequence = Input(shape=(None, ), dtype='int32') # 建立输入层,输入长度设为None embedding = Embedding( len(chars) + 1, embedding_size, )(sequence) # 去掉了mask_zero=True cnn = Conv1D(128, 3, activation='relu', padding='same')(embedding) cnn = Conv1D(128, 3, activation='relu', padding='same')(cnn) cnn = Conv1D(128, 3, activation='relu', padding='same')(cnn) # 层叠了3层CNN crf = CRF(True) # 定义crf层,参数为True,自动mask掉最后一个标签 tag_score = Dense(5)(cnn) # 变成了5分类,第五个标签用来mask掉 tag_score = crf(tag_score) # 包装一下原来的tag_score model = Model(inputs=sequence, outputs=tag_score) model.summary() model.compile( loss=crf.loss, # 用crf自带的loss optimizer='adam', metrics=[crf.accuracy] # 用crf自带的accuracy ) def max_in_dict(d): # 定义一个求字典中最大值的函数 key, value = list(d.items())[0]
def build(self, word_length, num_labels, num_intent_labels, word_vocab_size, char_vocab_size, word_emb_dims=100, char_emb_dims=30, char_lstm_dims=30, tagger_lstm_dims=100, dropout=0.2): self.word_length = word_length self.num_labels = num_labels self.num_intent_labels = num_intent_labels self.word_vocab_size = word_vocab_size self.char_vocab_size = char_vocab_size words_input = Input(shape=(None, ), name='words_input') embedding_layer = Embedding(word_vocab_size, word_emb_dims, name='word_embedding') word_embeddings = embedding_layer(words_input) word_embeddings = Dropout(dropout)(word_embeddings) word_chars_input = Input(shape=(None, word_length), name='word_chars_input') char_embedding_layer = Embedding(char_vocab_size, char_emb_dims, input_length=word_length, name='char_embedding') char_embeddings = char_embedding_layer(word_chars_input) char_embeddings = TimeDistributed(Bidirectional( LSTM(char_lstm_dims)))(char_embeddings) char_embeddings = Dropout(dropout)(char_embeddings) # first BiLSTM layer (used for intent classification) first_bilstm_layer = Bidirectional( LSTM(tagger_lstm_dims, return_sequences=True, return_state=True)) first_lstm_out = first_bilstm_layer(word_embeddings) lstm_y_sequence = first_lstm_out[:1][ 0] # save y states of the LSTM layer states = first_lstm_out[1:] hf, _, hb, _ = states # extract last hidden states h_state = concatenate([hf, hb], axis=-1) intents = Dense(num_intent_labels, activation='softmax', name='intent_classifier_output')(h_state) # create the 2nd feature vectors combined_features = concatenate([lstm_y_sequence, char_embeddings], axis=-1) # 2nd BiLSTM layer (used for entity/slots classification) second_bilstm_layer = Bidirectional( LSTM(tagger_lstm_dims, return_sequences=True))(combined_features) second_bilstm_layer = Dropout(dropout)(second_bilstm_layer) bilstm_out = Dense(num_labels)(second_bilstm_layer) # feed BiLSTM vectors into CRF crf = CRF(num_labels, name='intent_slot_crf') entities = crf(bilstm_out) model = Model(inputs=[words_input, word_chars_input], outputs=[intents, entities]) loss_f = { 'intent_classifier_output': 'categorical_crossentropy', 'intent_slot_crf': crf.loss } metrics = { 'intent_classifier_output': 'categorical_accuracy', 'intent_slot_crf': crf.viterbi_accuracy } model.compile(loss=loss_f, optimizer=AdamOptimizer(), metrics=metrics) self.model = model