def build_model(embeddings=100, vocab_size=vocab_size, rnn_units=100, window_size=1): x_in = Input(shape=(None, )) output = WindowEmbedding(window_size, input_dim=vocab_size, output_dim=embeddings, trainable=True, mask_zero=True)(x_in) # output=Embedding(input_dim=vocab_size, output_dim=embeddings, # trainable=True, mask_zero=True)(x_in) output = Bidirectional(LSTM(units=rnn_units, return_sequences=True), name='bilstm_1')(output) seq_output = Dropout(0.5)(output) output = Bidirectional(LSTM(units=rnn_units, return_sequences=True), name='bilstm_2')(output) tag_output = Dropout(0.5)(output) seq_output = TimeDistributed(Dense(num_seglabels), name='dense_seq')(seq_output) tag_output = TimeDistributed(Dense(num_labels), name='dense_tag')(tag_output) Seq_crf = ConditionalRandomField(lr_multiplier=seq_crf_lr_multiplier, name='seq_crf') Tag_crf = ConditionalRandomField(lr_multiplier=tag_crf_lr_multiplier, name='tag_crf') seq_output = Seq_crf(seq_output) tag_output = Tag_crf(tag_output) model = Model(x_in, [seq_output, tag_output]) model.summary() model.compile( loss=[Seq_crf.sparse_loss, Tag_crf.sparse_loss], # optimizer=Adam(learing_rate), optimizer=build_optimizer('Adam'), metrics=[SparseAccuracy()]) return model, Seq_crf, Tag_crf
def define_model(self) -> tf.keras.Model: model_name = self.bert4keras_model_name_dict[ self.pre_trained_model_type] params = { "config_path": self.pre_trained_model_config_path, "checkpoint_path": self.pre_trained_model_checkpoint_path, "model": model_name, } if self.simplified_tokenizer: params["keep_tokens"] = self.keep_tokens self.transformer = build_transformer_model(**params) output_layer = "Transformer-%s-FeedForward-Norm" % (self.bert_layers - 1) output = self.transformer.get_layer(output_layer).output output = tf.keras.layers.Dense(len(self.categories) * 2 + 1)(output) self.CRF = ConditionalRandomField(lr_multiplier=self.crf_lr_multiplier, name="CRF") output = self.CRF(output) model = tf.keras.models.Model(self.transformer.input, output) self.NER = NamedEntityRecognizer( self.tokenizer, model, self.categories, trans=K.eval(self.CRF.trans), starts=[0], ends=[0], ) return model
def build_albert_model(self): del self.albert_model file_name = f'albert_{self.pretrain_name}_pretrain.h5' ##这里,为了方便预训练模型加载,我预先将加载后的预训练模型保存为了.h5 if os.path.exists(file_name): pretrain_model = load_model(file_name, compile=False) else: pretrain_model = build_transformer_model( config_path=self.config, checkpoint_path=self.checkpoint, model='albert_unshared' if self.unshared else 'albert', return_keras_model=True) if not self.unshared: output_layer = 'Transformer-FeedForward-Norm' output = pretrain_model.get_layer(output_layer).get_output_at( self.albert_layers - 1) else: output_layer = 'Transformer-%s-FeedForward-Norm' % ( self.albert_layers - 1) output = pretrain_model.get_layer(output_layer).output output = Dense(self.__num_labels)(output) self.__crf = ConditionalRandomField( lr_multiplier=self.crf_lr_multiplier) output = self.__crf(output) model = Model(pretrain_model.input, output) model.load_weights(self.weight_path) self._model = model
def __init__(self): self.CRF = ConditionalRandomField( lr_multiplier=config.crf_lr_multiplier) self.model = self.get_model() self.NER = NamedEntityRecognizer(trans=K.eval(self.CRF.trans), starts=[0], ends=[0])
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ embed_char = outputs[0] embed_word = outputs[1] if self.wclstm_embed_type == "ATTNENTION": x_word = L.TimeDistributed( SelfAttention(K.int_shape(embed_word)[-1]))(embed_word) x_word_shape = K.int_shape(x_word) x_word = L.Reshape(target_shape=(x_word_shape[:2], x_word_shape[2] * x_word_shape[3])) x_word = L.Dense(self.embed_size, activation=self.activate_mid)(x_word) # elif self.wclstm_embed_type == "SHORT": else: x_word = L.Lambda(lambda x: x[:, :, 0, :])(embed_word) outputs_concat = L.Concatenate(axis=-1)([embed_char, x_word]) # LSTM or GRU if self.rnn_type == "LSTM": rnn_cell = L.LSTM elif self.rnn_type == "CuDNNLSTM": rnn_cell = L.CuDNNLSTM elif self.rnn_type == "CuDNNGRU": rnn_cell = L.CuDNNGRU else: rnn_cell = L.GRU # Bi-LSTM-CRF for nrl in range(self.num_rnn_layers): x = L.Bidirectional( rnn_cell( units=self.rnn_unit * (nrl + 1), return_sequences=True, activation=self.activate_mid, ))(outputs_concat) outputs = L.Dropout(self.dropout)(x) if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(outputs) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ] else: self.outputs = L.TimeDistributed( L.Dense(units=self.label, activation=self.activate_end))(outputs) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
def build_model(self): input = Input(shape=(140, ), dtype='float64') embeder = Embedding(len(self.token_dict), 300, input_length=140, trainable=False) embed = embeder(input) bilstm = Bidirectional( LSTM(units=300, return_sequences=True, dropout=0.5, recurrent_dropout=0.5))(embed) lstm = LSTM(units=300 * 2, return_sequences=True, dropout=0.5, recurrent_dropout=0.5)(bilstm) out = TimeDistributed(Dense(17, activation='relu'))(lstm) CRF = ConditionalRandomField(17) out = CRF(out) model = Model(input, out) model.compile(optimizer='adam', loss=CRF.sparse_loss, metrics=[CRF.sparse_accuracy]) model.summary() return model
def build_model(embeddings=100,vocab_size=vocab_size,rnn_units=100): x_in = Input(shape=(None,)) output=Embedding(input_dim=vocab_size, output_dim=embeddings, trainable=True, mask_zero=True)(x_in) flstm_output=LSTM(units=rnn_units, return_sequences=True)(output) seq_output=Dropout(0.5)(flstm_output) seq_output=TimeDistributed(Dense(num_seglabels), name='dense_seq')(seq_output) Seq_crf = ConditionalRandomField(lr_multiplier=seq_crf_lr_multiplier,name='seq_crf') seq_output=Seq_crf(seq_output) reverse_output=Lambda(lambda x: K.reverse(x,axes=1))(output) reverse_output=LSTM(units=rnn_units, return_sequences=True)(reverse_output) blstm_output=Lambda(lambda x: K.reverse(x,axes=1))(reverse_output) lstm_out=Concatenate()([flstm_output,blstm_output]) tag_output=Dropout(0.5)(lstm_out) tag_output=TimeDistributed(Dense(num_labels), name='dense_tag')(tag_output) Tag_crf = ConditionalRandomField(lr_multiplier=tag_crf_lr_multiplier,name='tag_crf') tag_output=Tag_crf(tag_output) model = Model(x_in, [seq_output,tag_output]) model.summary() model.compile( loss=[Seq_crf.sparse_loss,Tag_crf.sparse_loss], optimizer=Adam(learing_rate), metrics=[SparseAccuracy()] ) return model,Seq_crf,Tag_crf
def build_model(self, inputs, outputs): x = L.Dense(units=self.label, activation=self.activate_mid)(outputs) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.model = M.Model(inputs, self.outputs) self.model.summary(132) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ]
def build_model(self): model = build_transformer_model( self.bert_config_path, self.bert_checkpoint_path, ) output_layer = 'Transformer-%s-FeedForward-Norm' % (self.bert_layers - 1) output = model.get_layer(output_layer).output output = Dense(self.num_classes)(output) self.CRF = ConditionalRandomField(lr_multiplier=self.crf_lr_multiplier) output = self.CRF(output) self.model = Model(model.input, output) self.model_ = multi_gpu_model(self.model, gpus=2) self.model.summary(120) logger.info('build model done')
def build_crf_adversarial_bert(num_labels, model_name='electra'): model = build_transformer_model(config_path, checkpoint_path, model=model_name) for layer in model.layers: layer.trainable = True output = Dense(num_labels)(model.output) CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) model.compile(loss=CRF.sparse_loss, optimizer=Adam(learning_rate), metrics=[CRF.sparse_accuracy]) return model, CRF
def build_bert(num_labels): model = build_transformer_model(config_path, checkpoint_path) # ,model = 'electra') for layer in model.layers: layer.trainable = True # bilstm = Bidirectional(GRU(200, return_sequences=True))(model.output) # bilstm = SpatialDropout1D(0.5)(bilstm) output = Dense(num_labels)(model.output) CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) # model.summary() # model = multi_gpu_model(model, gpus= 2) model.compile(loss=CRF.sparse_loss, optimizer=Adam(learning_rate), metrics=[CRF.sparse_accuracy]) return model, CRF
def build_model(self): model = build_transformer_model( self.config_path, self.checkpoint_path, model='electra' ) output_layer = 'Transformer-%s-FeedForward-Norm' % (12 - 1) output = model.get_layer(output_layer).output output = Dense(11)(output) self.CRF = ConditionalRandomField(lr_multiplier=100) output = self.CRF(output) model = Model(model.input, output) model.summary() model.compile(loss=self.CRF.sparse_loss, optimizer=Adam(1e-4), metrics=[self.CRF.sparse_accuracy] ) return model
def build_model(self): """ 建模,加载bert预训练模型,并在最后几层进行微调 :return: """ bert_model = build_transformer_model(config_path=args.BERT_CONFIG, checkpoint_path=args.BERT_MODEL) output = bert_model.get_layer(args.BERT_LAYER).output output = Dropout(rate=0.5)(output) output = Dense(_labels_num)(output) CRF = ConditionalRandomField(lr_multiplier=1) p = CRF(output) model = Model(bert_model.input,p) model.compile( loss=CRF.sparse_loss, optimizer=Adam(lr=1e-5), metrics=[CRF.sparse_accuracy] ) model.summary() return model
def bertmodel(): model = build_transformer_model( config_path, checkpoint_path, ) output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) output = model.get_layer(output_layer).output output = Dense(num_labels)(output) # 27分类 CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) # model.summary() model.compile( loss=CRF.sparse_loss, optimizer=Adam(learing_rate), metrics=[CRF.sparse_accuracy] ) return model, CRF
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ # CuDNNGRU or GRU x = None if self.rnn_type.upper() == "CUDNNGRU": rnn_cell = L.CuDNNGRU else: rnn_cell = L.GRU # Bi-GRU for nrl in range(self.num_rnn_layers): x = L.Bidirectional( rnn_cell( units=self.rnn_unit, return_sequences=True, activation=self.activate_mid, ))(outputs) x = L.Dropout(self.dropout)(x) if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(x) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ] else: self.outputs = L.TimeDistributed( L.Dense(units=self.label, activation=self.activate_end))(x) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ # LSTM or GRU if self.rnn_type == "LSTM": rnn_cell = L.LSTM elif self.rnn_type == "CuDNNLSTM": rnn_cell = L.CuDNNLSTM elif self.rnn_type == "CuDNNGRU": rnn_cell = L.CuDNNGRU else: rnn_cell = L.GRU # Bi-LSTM-LAN for nrl in range(self.num_rnn_layers): x = L.Bidirectional(rnn_cell(units=self.rnn_unit*(nrl+1), return_sequences=True, activation=self.activate_mid, ))(outputs) x_att = SelfAttention(K.int_shape(x)[-1])(x) outputs = L.Concatenate()([x, x_att]) outputs = L.Dropout(self.dropout)(outputs) if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(outputs) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy] else: self.outputs = L.TimeDistributed(L.Dense(units=self.label, activation=self.activate_end))(outputs) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
batch_labels.append(labels) if len(batch_token_ids) == self.batch_size or is_end: batch_token_ids = sequence_padding(batch_token_ids) batch_segment_ids = sequence_padding(batch_segment_ids) batch_labels = sequence_padding(batch_labels) yield [batch_token_ids, batch_segment_ids], batch_labels batch_token_ids, batch_segment_ids, batch_labels = [], [], [] model = build_transformer_model( config_path, checkpoint_path, ) output = Dense(num_labels)(model.output) CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) model.summary() model.compile( loss=CRF.sparse_loss, optimizer=Adam(learning_rate), metrics=[CRF.sparse_accuracy] ) def viterbi_decode(nodes, trans): """Viterbi算法求最优路径 其中nodes.shape=[seq_len, num_labels], trans.shape=[num_labels, num_labels].
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ # CNN, 提取n-gram特征和最大池化, DGCNN膨胀卷积(IDCNN) conv_pools = [] for i in range(len(self.filters_size)): conv = L.Conv1D( name="conv-{0}-{1}".format(i, self.filters_size[i]), dilation_rate=self.atrous_rates[0], kernel_size=self.filters_size[i], activation=self.activate_mid, filters=self.filters_num, padding="SAME", )(outputs) for j in range(len(self.atrous_rates) - 1): conv = L.Conv1D( name="conv-{0}-{1}-{2}".format(i, self.filters_size[i], j), dilation_rate=self.atrous_rates[j], kernel_size=self.filters_size[i], activation=self.activate_mid, filters=self.filters_num, padding="SAME", )(conv) conv = L.Dropout( name="dropout-{0}-{1}-{2}".format(i, self.filters_size[i], j), rate=self.dropout, )(conv) conv_pools.append(conv) # 拼接 x = L.Concatenate(axis=-1)(conv_pools) x = L.Dropout(self.dropout)(x) # CRF or Dense if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(x) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ] else: x = L.Bidirectional( L.GRU( activation=self.activate_mid, return_sequences=True, units=self.rnn_unit, name="bi-gru", ))(x) self.outputs = L.TimeDistributed( L.Dense( activation=self.activate_end, name="dense-output", units=self.label, ))(x) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
domain_output = Dense(domain_num, activation='softmax', kernel_initializer=electra_model.initializer, name='domain_classifier')(classify_output) domain_model = Model(electra_model.input, domain_output) # 意图识别模型 intent_output = Dense(intent_num, activation='softmax', kernel_initializer=electra_model.initializer, name='intent_classifier')(classify_output) intent_model = Model(electra_model.model.input, intent_output) # 槽填充 x = Dense(slot_num)(electra_model.model.output) CRF = ConditionalRandomField(lr_multiplier=lr_multiplier, name='slots_tagger') slot_output = CRF(x) slot_model = Model(electra_model.input, slot_output) # 训练模型 train_model = Model( electra_model.input + [domain_labels, intent_labels, slot_labels], [domain_output, intent_output, slot_output]) mask = Lambda(lambda x: K.cast(K.greater(x, 0), 'float32'))(slot_labels) # intent_loss intent_loss = K.sparse_categorical_crossentropy(intent_labels, intent_output) # domain_loss domain_loss = K.sparse_categorical_crossentropy(domain_labels, domain_output)
classify_output = Lambda(lambda x: x[:, 0], name='CLS-token')(electra_model.model.output) # 领域识别模型 domain_output = Dense(domain_num, activation='softmax', kernel_initializer=electra_model.initializer, name='domain_classifier')(classify_output) domain_model = Model(electra_model.input, domain_output) # 意图识别模型 intent_output = Dense(intent_num, activation='softmax', kernel_initializer=electra_model.initializer, name='intent_classifier')(classify_output) intent_model = Model(electra_model.model.input, intent_output) # 槽填充 x = Dense(slot_num)(electra_model.model.output) CRF = ConditionalRandomField(lr_multiplier=lr_multiplier, name='slots_tagger') slot_output = CRF(x) slot_model = Model(electra_model.input, slot_output) # 训练模型 train_model = Model( # electra_model.input + [intent_labels, domain_labels, slot_labels], electra_model.model.input, [domain_output, intent_output, slot_output] ) loss = { "domain_classifier": 'sparse_categorical_crossentropy', "intent_classifier": 'sparse_categorical_crossentropy', 'slots_tagger': CRF.sparse_loss }
def build_model(self, inputs, outputs): """ build_model. Args: inputs: tensor, input of model outputs: tensor, output of model Returns: None """ # LSTM or GRU if self.rnn_type == "LSTM": rnn_cell = L.LSTM elif self.rnn_type == "CuDNNLSTM": rnn_cell = L.CuDNNLSTM elif self.rnn_type == "CuDNNGRU": rnn_cell = L.CuDNNGRU else: rnn_cell = L.GRU # CNN-LSTM, 提取n-gram特征和最大池化, 一般不用平均池化 conv_pools = [] for i in range(len(self.filters_size)): conv = L.Conv1D( name="conv-{0}-{1}".format(i, self.filters_size[i]), kernel_size=self.filters_size[i], activation=self.activate_mid, filters=self.filters_num, padding='same', )(outputs) conv_rnn = L.Bidirectional( rnn_cell( name="bi-lstm-{0}-{1}".format(i, self.filters_size[i]), activation=self.activate_mid, return_sequences=True, units=self.rnn_unit, ))(conv) x_dropout = L.Dropout(rate=self.dropout, name="dropout-{0}-{1}".format( i, self.filters_size[i]))(conv_rnn) conv_pools.append(x_dropout) # 拼接 x = L.Concatenate(axis=-1)(conv_pools) x = L.Dropout(self.dropout)(x) # CRF or Dense if self.use_crf: x = L.Dense(units=self.label, activation=self.activate_end)(x) self.CRF = ConditionalRandomField(self.crf_lr_multiplier, name="crf_bert4keras") self.outputs = self.CRF(x) self.trans = K.eval(self.CRF.trans).tolist() self.loss = self.CRF.dense_loss if self.use_onehot else self.CRF.sparse_loss self.metrics = [ self.CRF.dense_accuracy if self.use_onehot else self.CRF.sparse_accuracy ] else: self.outputs = L.TimeDistributed( L.Dense(units=self.label, activation=self.activate_end, name="dense-output"))(x) self.model = M.Model(inputs, self.outputs) self.model.summary(132)
# tf.disable_v2_behavior() # 禁用2.0版本的方法 maxlen = 250 epochs = 10 batch_size = 1 bert_layers = 24 learing_rate = 1e-5 # bert_layers越小,学习率应该要越大 crf_lr_multiplier = 1000 # 必要时扩大CRF层的学习率 config_path = './chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/bert_config.json' checkpoint_path = './chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/bert_model.ckpt' dict_path = './chinese_roberta_wwm_large_ext_L-24_H-1024_A-16/vocab.txt' # print(tf.test.is_gpu_available()) # print(tf.config.list_physical_devices('GPU')) CRF = ConditionalRandomField( lr_multiplier=crf_lr_multiplier) # CRF层本质上是一个带训练参数的loss计算层 # 建立分词器 tokenizer = Tokenizer(dict_path, do_lower_case=True) # 标注数据 loader = tcm.TCM() def train(): train_data = loader.load_data('./round1_train/data/train.txt' ) # 第一个维度为所有训练样本中句子个数,第二个维度是每个句子所包含的(实体,类别)数 valid_data = loader.load_data('./round1_train/data/val.txt') global train_generator train_generator = generator.Generator(train_data=train_data, batch_size=batch_size, tokenizer=tokenizer,
print(train_inputs[0][0]) print(train_inputs[1][0]) bert_path = '../tfhub/chinese_roberta_wwm_ext_L-12_H-768_A-12' from tensorflow.keras.layers import * from tensorflow.keras.callbacks import Callback from bert4keras.backend import keras, K from bert4keras.layers import ConditionalRandomField from bert4keras.models import build_transformer_model from bert4keras.snippets import ViterbiDecoder, to_array bert = build_transformer_model(bert_path, return_keras_model=False) output = Dense(len(y2id))(bert.model.output) CRF = ConditionalRandomField(lr_multiplier=1000) output = CRF(output) model = tf.keras.models.Model(inputs=bert.model.input, outputs=output) bt.lock_transformer_layers(bert, 8) epochs = 2 batch_size = 32 total_steps = epochs * train_inputs[0].shape[0] // batch_size optimizer = bt.get_suggested_optimizer(1e-4, total_steps=total_steps) model.compile(optimizer, loss=CRF.sparse_loss, metrics=[CRF.sparse_accuracy]) #model.summary() from seqeval.metrics import f1_score, accuracy_score, classification_report