def build_ner_albert(args): bert_model = build_transformer_model( config_path=args.config_path, checkpoint_path=args.checkpoint_path, model='albert', # return_keras_model=False, ) x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) #x = Lambda(lambda x: x[:, 0])(x) p = Dense(args.nclass, activation='softmax', name="p")(x) model = Model([x1_in, x2_in], p) model.compile( #loss=multi_category_focal_loss2(gamma=2., alpha=.25), loss='categorical_crossentropy', optimizer=Adam(args.lr), #metrics=["accuracy"] ) print(model.summary()) return model
def build_ner_bert(args, training=False): bert_model = load_trained_model_from_checkpoint( args.config_path, args.checkpoint_path, seq_len=None, training=training) #加载预训练模型 for l in bert_model.layers: #if "-12-" in l.name or "-11-" in l.name or "-10-" in l.name: # freeze certrain encoder blocks while finetuning l.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) #x = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(12))(x) #directly extract output of encoder blocks of layer 1-12. p = Dense(args.nclass, activation='softmax', name="p")(x) model = Model([x1_in, x2_in], p) model.compile( #loss=multi_category_focal_loss2(gamma=2., alpha=.25), loss='categorical_crossentropy', optimizer=Adam(args.lr), #metrics=["accuracy"] ) print(model.summary()) return model
def build_mrc_bert(args, training=False): bert_model = load_trained_model_from_checkpoint( args.config_path, args.checkpoint_path, seq_len=None, training=training) #加载预训练模型 print(bert_model) for l in bert_model.layers: #if "-12-" in l.name or "-11-" in l.name or "-10-" in l.name: # freeze certrain encoder blocks while finetuning l.trainable = True x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) #x = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(12))(x) #directly extract output of encoder blocks of layer 1-12. p_start = Dense(1, activation='sigmoid', name="p_start")(x) p_end = Dense(1, activation='sigmoid', name="p_end")(x) model = Model([x1_in, x2_in], [p_start, p_end]) model.compile( loss=focal_loss(gamma=2., alpha=.25), #loss='binary_crossentropy', optimizer=Adam(args.lr), #用足够小的学习率 #loss_weights=[1., 1.] #metrics=['accuracy'] ) print(model.summary()) return model
def build_cls_bert(args): bert_model = load_trained_model_from_checkpoint( args.config_path, args.checkpoint_path, seq_len=None, use_adapter=True) # 加载预训练模型 for l in bert_model.layers: #if "-12-" in l.name or "-11-" in l.name or "-10-" in l.name: # freeze certrain encoder blocks while finetuning l.trainable = True # False to freeze parameters in this encoder layer x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) ''' a1 = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(12))(x)#extract output from the last encoder layer a2 = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(11))(x) x = Add()([a1, a2]) x = Lambda(lambda x: x[:, 0])(x) # extrace [CLS] tensor for downstream tasks. x = bert_model.get_layer('NSP-Dense').output ''' p = Dense(args.nclass, activation='softmax')(x) model = Model([x1_in, x2_in], p) model.compile( loss='categorical_crossentropy', #loss = focal_loss(gamma=2., alpha=.25), #loss=multi_category_focal_loss2(gamma=2., alpha=.25), #optimizer=AdamLR(learning_rate=1e-4, lr_schedule={1000: 1,2000: 0.1}), optimizer=Adam(args.lr), metrics=['accuracy', f1]) print(model.summary()) return model
def build_model(): bert = build_transformer_model( config_path, checkpoint_path, return_keras_model=False, ) output = Lambda(lambda x: x[:, 0], name='CLS-token')(bert.model.output) output = Dense( units=len(label2id) if args.task == 'category' else 1, activation='softmax' if args.task == 'category' else 'sigmoid', kernel_initializer=bert.initializer)(output) model = keras.models.Model(bert.model.input, output) model.summary() # AdamLR = extend_with_piecewise_linear_lr(Adam, name='AdamLR') model.compile( loss='sparse_categorical_crossentropy' if args.task == 'category' else 'binary_crossentropy', optimizer=Adam(learning_rate), # 用足够小的学习率 # optimizer=AdamLR(learning_rate=1e-4, lr_schedule={ # 1000: 1, # 2000: 0.1 # }), metrics=['accuracy'], ) return model
def build_mrc_albert(args): bert_model = build_transformer_model( config_path=args.config_path, checkpoint_path=args.checkpoint_path, model='albert', # return_keras_model=False, ) x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) x = bert_model([x1_in, x2_in]) #x = bert_model.get_layer(name='Encoder-{}-FeedForward-Norm'.format(12))(x) #x = Lambda(lambda x: x, output_shape=lambda s:s)(x) p_start = Dense(1, activation='sigmoid', name="p_start")(x) p_end = Dense(1, activation='sigmoid', name="p_end")(x) model = Model([x1_in, x2_in], [p_start, p_end]) model.compile( loss=focal_loss(gamma=2., alpha=.25), #loss='binary_crossentropy', optimizer=Adam(args.lr), #loss_weights=[1., 1.] #metrics=['accuracy'] ) print(model.summary()) return model
def compile_model(self): self.model_.compile( # self.model.compile( loss=self.CRF.sparse_loss, optimizer=Adam(self.learning_rate), metrics=[self.CRF.sparse_accuracy]) logger.info('compile model done')
def get_sentiment_model(): global model class CrossEntropy(Loss): """交叉熵作为loss,并mask掉padding部分 """ def compute_loss(self, inputs, mask=None): y_true, y_pred = inputs if mask[1] is None: y_mask = 1.0 else: y_mask = K.cast(mask[1], K.floatx())[:, 1:] y_true = y_true[:, 1:] # 目标token_ids y_pred = y_pred[:, :-1] # 预测序列,错开一位 accuracy = keras.metrics.sparse_categorical_accuracy( y_true, y_pred) accuracy = K.sum(accuracy * y_mask) / K.sum(y_mask) self.add_metric(accuracy, name='accuracy') loss = K.sparse_categorical_crossentropy(y_true, y_pred) loss = K.sum(loss * y_mask) / K.sum(y_mask) return loss output = CrossEntropy(1)([model.input, model.output]) model = keras.models.Model(model.input, output) model.compile(optimizer=Adam(6e-4)) model.summary() return model
def buildmodel(self): self.token_dict, self.keep_tokens = load_vocab( dict_path=self.dict_path, simplified=True, startswith=['[PAD]', '[UNK]', '[CLS]', '[SEP]', '[MASK]'], ) self.tokenizer = Tokenizer(self.token_dict, do_lower_case=True) if self.pretrain_type == 'albert': model = build_transformer_model( config_path, checkpoint_path, model='albert', with_mlm=True, keep_tokens=self.keep_tokens, ) elif self.pretrain_type == 'bert': model = build_transformer_model( config_path, checkpoint_path, model='bert', with_mlm=True, keep_tokens=self.keep_tokens, ) output = Lambda(lambda x: x[:, 1:self.max_a_len + 1])(model.output) #print(output.shape) self.model = Model(model.input, output) self.model.compile(loss=self.masked_cross_entropy, optimizer=Adam(self.lr)) self.model.summary()
def build_model(): """ 构建模型主体。 :return: 模型对象 """ with SESS.as_default(): with SESS.graph.as_default(): # 搭建bert模型主体 bert_model = build_transformer_model( config_path=bert_config.config_path, checkpoint_path=bert_config.checkpoint_path, return_keras_model=False, model=bert_config.model_type) # l为模型内部的层名,格式为--str for l in bert_model.layers: bert_model.model.get_layer(l).trainable = True # 取出[CLS]对应的向量用来做分类 t = Lambda(lambda x: x[:, 0])(bert_model.model.output) t = Dropout(cameo_train_config.drop_out_rate)(t) # 预测事件cameo cameo_out_put = Dense(len(ID2LABEL), activation='softmax')(t) # cameo模型主体 cameo_model = Model(bert_model.model.inputs, cameo_out_put) cameo_model.compile(loss='sparse_categorical_crossentropy', optimizer=Adam( cameo_train_config.learning_rate), metrics=['accuracy']) cameo_model.summary() return cameo_model
def build_model(): """ 搭建模型结构,返回模型对象 :return: model """ # 构建bert模型 bert_model = build_transformer_model( config_path=bert_config.config_path, checkpoint_path=bert_config.checkpoint_path, model=bert_config.model_type, return_keras_model=False) # l为模型内部的层名,格式为--str for l in bert_model.layers: bert_model.model.get_layer(l).trainable = True # 构建模型主体 t = Lambda(lambda x: x[:, 0])(bert_model.model.output) # 取出[CLS]对应的向量用来做分类 t = Dropout(match_train_config.drop_out_rate)(t) # 模型预测输出 output = Dense(units=2, activation='softmax')(t) model = Model(bert_model.model.inputs, output) model.summary() model.compile( loss='sparse_categorical_crossentropy', optimizer=Adam(match_train_config.learning_rate), # 用足够小的学习率 metrics=['accuracy'], ) return model
def get_model(tokens, keep_tokens): model = build_transformer_model( config_path=BaseConfig.config_path, checkpoint_path=BaseConfig.checkpoint_path, with_mlm=True, model="nezha", keep_tokens=[0, 100, 101, 102, 103, 100, 100] + keep_tokens[:len(tokens)]) model.compile(loss=masked_crossentropy, optimizer=Adam(2e-5)) model.summary() return model
def build_model(): bert_model = build_transformer_model( config_path=Config.config_path, checkpoint_path=Config.checkpoint_path, return_keras_model=False) # 补充输入 subject_labels = Input(shape=(None, 2)) subject_ids = Input(shape=(2, )) object_labels = Input(shape=(None, len(predicate2id), 2)) # 预测subject output = Dense(units=2, activation='sigmoid', kernel_initializer=bert_model.initializer)( bert_model.model.output) subject_preds = Lambda(lambda x: x**2)(output) subject_model = Model(bert_model.inputs, subject_preds) # 传入subject,预测object output = bert_model.model.layers[-2].get_output_at(-1) subject = Lambda(extrac_subject)([output, subject_ids]) output = LayerNormalization(conditional=True)([output, subject]) output = Dense(units=len(predicate2id) * 2, activation='sigmoid', kernel_initializer=bert_model.initializer)(output) output = Lambda(lambda x: x**4)(output) object_preds = Reshape((-1, len(predicate2id), 2))(output) object_model = Model(bert_model.model.inputs + [subject_ids], object_preds) # 训练模型 train_model = Model( bert_model.model.inputs + [subject_labels, subject_ids, object_labels], [subject_preds, object_preds]) mask = bert_model.model.get_layer('Embedding-Token').output_mask mask = K.cast(mask, K.floatx()) subject_loss = K.binary_crossentropy(subject_labels, subject_preds) subject_loss = K.mean(subject_loss, 2) subject_loss = K.sum(subject_loss * mask) / K.sum(mask) object_loss = K.binary_crossentropy(object_labels, object_preds) object_loss = K.sum(K.mean(object_loss, 3), 2) object_loss = K.sum(object_loss * mask) / K.sum(mask) train_model.add_loss(subject_loss + object_loss) optimizer = Adam(Config.learning_rate) train_model.compile(optimizer=optimizer) return train_model, subject_model, object_model
def build_model(mode='bert', filename='bert', lastfour=False, LR=1e-5, DR=0.2): if filename == 'bert': path = './chinese_L-12_H-768_A-12/' elif filename == 'ernie': path = './chinese_L-12_H-768_A-12/' elif filename == 'roberta': path = './chinese_L-12_H-768_A-12/' config_path = path + 'bert_config.json' checkpoint_path = path + 'bert_model.ckpt' dict_path = path + 'vocab.txt' global tokenizer tokenizer = Tokenizer(dict_path, do_lower_case=True) bert = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, model=mode, return_keras_model=False, ) if lastfour: model = Model(inputs=bert.model.input, outputs=[ bert.model.layers[-3].get_output_at(0), bert.model.layers[-11].get_output_at(0), bert.model.layers[-19].get_output_at(0), bert.model.layers[-27].get_output_at(0), ]) output = model.outputs output1 = Lambda(lambda x: x[:, 0], name='Pooler1')(output[0]) output2 = Lambda(lambda x: x[:, 0], name='Pooler2')(output[1]) output3 = Lambda(lambda x: x[:, 0], name='Pooler3')(output[2]) output4 = Lambda(lambda x: x[:, 0], name='Pooler4')(output[3]) output = Concatenate(axis=1)([output1, output2, output3, output4]) else: output = bert.model.output output = Dropout(rate=DR)(output) output = Dense(units=2, activation='softmax', kernel_initializer=bert.initializer)(output) model = Model(bert.model.input, output) model.compile( loss='sparse_categorical_crossentropy', optimizer=Adam(LR), metrics=['accuracy'], ) return model
def build_model(): model = build_transformer_model( config.config_path, config.checkpoint_path, application='unilm', keep_tokens=keep_tokens # 只保留keep_tokens中的字,精简原字表 ) output = CrossEntropy(2)(model.inputs + model.outputs) model = Model(model.inputs, output) model.compile(optimizer=Adam(1e-5)) return model
def GeneratePretrain(c_e, g_pre_lr): c_in = Input(shape=(1, )) c = Embedding(2, c_e)(c_in) c = Reshape((128, ))(c) model = build_transformer_model( config_path, checkpoint_path, application='lm', keep_tokens=keep_tokens, layer_norm_cond=c, additional_input_layers=c_in, ) output = CrossEntropy(1)([model.inputs[0], model.outputs[0]]) model = Model(model.inputs, output) model.compile(optimizer=Adam(g_pre_lr)) return model
def build_model(self): c_in = Input(shape=(1, )) c = Embedding(2, self.c_e)(c_in) c = Reshape((self.c_e, ))(c) model = build_transformer_model( config_path=config_path, checkpoint_path=checkpoint_path, application='lm', keep_tokens=keep_tokens, layer_norm_cond=c, additional_input_layers=c_in, ) output = model.outputs[0] model = Model(model.inputs, output) model.compile(optimizer=Adam(self.g_lr), loss=self.loss) return model
def build_crf_adversarial_bert(num_labels, model_name='electra'): model = build_transformer_model(config_path, checkpoint_path, model=model_name) for layer in model.layers: layer.trainable = True output = Dense(num_labels)(model.output) CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) model.compile(loss=CRF.sparse_loss, optimizer=Adam(learning_rate), metrics=[CRF.sparse_accuracy]) return model, CRF
def build_model(self): model = build_transformer_model( self.config_path, self.checkpoint_path, model='electra' ) output_layer = 'Transformer-%s-FeedForward-Norm' % (12 - 1) output = model.get_layer(output_layer).output output = Dense(11)(output) self.CRF = ConditionalRandomField(lr_multiplier=100) output = self.CRF(output) model = Model(model.input, output) model.summary() model.compile(loss=self.CRF.sparse_loss, optimizer=Adam(1e-4), metrics=[self.CRF.sparse_accuracy] ) return model
def build_bert(num_labels): model = build_transformer_model(config_path, checkpoint_path) # ,model = 'electra') for layer in model.layers: layer.trainable = True # bilstm = Bidirectional(GRU(200, return_sequences=True))(model.output) # bilstm = SpatialDropout1D(0.5)(bilstm) output = Dense(num_labels)(model.output) CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) # model.summary() # model = multi_gpu_model(model, gpus= 2) model.compile(loss=CRF.sparse_loss, optimizer=Adam(learning_rate), metrics=[CRF.sparse_accuracy]) return model, CRF
def get_model(self): pretrained_bert = build_transformer_model( config.bert_config_path, config.bert_checkpoint_path, ) pretrained_bert.trainable = True set_trainable = False for layer in pretrained_bert.layers: if (layer.name.startswith('Transformer-10') or layer.name.startswith('Transformer-11') or layer.name.startswith('Transformer-9')): set_trainable = True if set_trainable: layer.trainable = True else: layer.trainable = False last_layer1 = 'Transformer-%s-FeedForward-Norm' % (config.bert_layers - 1) output_layer1 = pretrained_bert.get_layer(last_layer1).output last_layer2 = 'Transformer-%s-FeedForward-Norm' % (config.bert_layers - 2) output_layer2 = pretrained_bert.get_layer(last_layer2).output last_layer3 = 'Transformer-%s-FeedForward-Norm' % (config.bert_layers - 3) output_layer3 = pretrained_bert.get_layer(last_layer3).output output = keras.layers.add( [output_layer1, output_layer2, output_layer3]) output = Bidirectional(LSTM(128, return_sequences=True))(output) output = Dense(config.num_labels)(output) # 27分类 output = self.CRF(output) model = Model(pretrained_bert.input, output) model.compile(loss=self.CRF.sparse_loss, optimizer=Adam(config.learning_rate), metrics=[self.CRF.sparse_accuracy]) return model
def build_model(self): """ 建模,加载bert预训练模型,并在最后几层进行微调 :return: """ bert_model = build_transformer_model(config_path=args.BERT_CONFIG, checkpoint_path=args.BERT_MODEL) output = bert_model.get_layer(args.BERT_LAYER).output output = Dropout(rate=0.5)(output) output = Dense(_labels_num)(output) CRF = ConditionalRandomField(lr_multiplier=1) p = CRF(output) model = Model(bert_model.input,p) model.compile( loss=CRF.sparse_loss, optimizer=Adam(lr=1e-5), metrics=[CRF.sparse_accuracy] ) model.summary() return model
def train(): train_data = loader.load_data('./round1_train/data/train.txt' ) # 第一个维度为所有训练样本中句子个数,第二个维度是每个句子所包含的(实体,类别)数 valid_data = loader.load_data('./round1_train/data/val.txt') global train_generator train_generator = generator.Generator(train_data=train_data, batch_size=batch_size, tokenizer=tokenizer, maxlen=maxlen, label2id=loader.label2id) global model model = build_transformer_model( config_path, checkpoint_path, ) # 根据bert_model.ckpt和bert_config.json文件构建transformer模型 output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) output = model.get_layer(output_layer).output # shape=(None, None, 768) output = Dense(loader.num_labels)(output) # 27分类,13类*(B+I)+O output = CRF(output) model = Model(model.input, output) model.summary() model.compile(loss=CRF.sparse_loss, optimizer=Adam(learing_rate), metrics=[CRF.sparse_accuracy]) NER = models.NamedEntityRecognizer(trans=K.eval(CRF.trans), starts=[0], ends=[0]) evaluate = evaluator.Evaluator(valid_data, tokenizer, model, NER, CRF, loader) model.fit_generator(train_generator.forfit(), steps_per_epoch=len(train_generator), epochs=epochs, callbacks=[evaluate])
def bertmodel(): model = build_transformer_model( config_path, checkpoint_path, ) output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) output = model.get_layer(output_layer).output output = Dense(num_labels)(output) # 27分类 CRF = ConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) # model.summary() model.compile( loss=CRF.sparse_loss, optimizer=Adam(learing_rate), metrics=[CRF.sparse_accuracy] ) return model, CRF
def build_model(embeddings=100,vocab_size=vocab_size,rnn_units=100): x_in = Input(shape=(None,)) output=Embedding(input_dim=vocab_size, output_dim=embeddings, trainable=True, mask_zero=True)(x_in) flstm_output=LSTM(units=rnn_units, return_sequences=True)(output) seq_output=Dropout(0.5)(flstm_output) seq_output=TimeDistributed(Dense(num_seglabels), name='dense_seq')(seq_output) Seq_crf = ConditionalRandomField(lr_multiplier=seq_crf_lr_multiplier,name='seq_crf') seq_output=Seq_crf(seq_output) reverse_output=Lambda(lambda x: K.reverse(x,axes=1))(output) reverse_output=LSTM(units=rnn_units, return_sequences=True)(reverse_output) blstm_output=Lambda(lambda x: K.reverse(x,axes=1))(reverse_output) lstm_out=Concatenate()([flstm_output,blstm_output]) tag_output=Dropout(0.5)(lstm_out) tag_output=TimeDistributed(Dense(num_labels), name='dense_tag')(tag_output) Tag_crf = ConditionalRandomField(lr_multiplier=tag_crf_lr_multiplier,name='tag_crf') tag_output=Tag_crf(tag_output) model = Model(x_in, [seq_output,tag_output]) model.summary() model.compile( loss=[Seq_crf.sparse_loss,Tag_crf.sparse_loss], optimizer=Adam(learing_rate), metrics=[SparseAccuracy()] ) return model,Seq_crf,Tag_crf
def train(train_param, model_save_path): # logger.info() train_data, valid_data, schema_dict = load_data() train_param['schema_dict'] = schema_dict # print(train_param) # 建立分词器 tokenizer = Tokenizer(train_param['dict_path'], do_lower_case=True) trainmodel = TagModel(train_param) trainmodel.model.compile(loss=trainmodel.CRF.sparse_loss, optimizer=Adam(train_param['learing_rate']), metrics=[trainmodel.CRF.sparse_accuracy]) train_generator = data_generator(train_data, train_param['batch_size'], tokenizer, schema_dict['label2id'], train_param['maxlen']) trainmodel.model.fit_generator( train_generator.forfit(), steps_per_epoch=len(train_generator), epochs=train_param['epochs'], ) savemodel_name = os.path.join(model_save_path, 'best_model.weights') trainmodel.model.save_weights(savemodel_name) params_file = os.path.join(model_save_path, 'config.json') with open(params_file, 'w', encoding='utf-8') as json_file: json.dump(train_param, json_file, indent=4, ensure_ascii=False) NER = NamedEntityRecognizer(K.eval(trainmodel.CRF.trans), trainmodel.model, tokenizer, schema_dict['id2label'], starts=[0], ends=[0]) eval_result = evaluate(valid_data, NER) return eval_result
def build_model(): bert = build_transformer_model( config_path, checkpoint_path, return_keras_model=False, ) output = Lambda(lambda x: x[:, 0], name='CLS-token')(bert.model.output) output = Dense(units=2, activation='softmax', kernel_initializer=bert.initializer)(output) model = keras.models.Model(bert.model.input, output) model.summary() model.compile( loss='sparse_categorical_crossentropy', optimizer=Adam(learning_rate), # 用足够小的学习率 metrics=['accuracy'], ) return model
def make_model(config_path, checkpoint_path, prefix): if prefix == 'BERT' or prefix == 'roberta-large': bert = build_bert_model( config_path=config_path, checkpoint_path=checkpoint_path, with_pool=True, return_keras_model=False, ) if prefix == 'NEZHA': bert = build_bert_model( config_path=config_path, checkpoint_path=checkpoint_path, model='nezha', with_pool=True, return_keras_model=False, ) output = Dropout(rate=0.01)(bert.model.output) ## 加了adversarial 层后,可以考虑更稳定些 #output = Lambda(lambda x: x[:, 0])(bert.model.output) output = Dense(units=2, activation='softmax', kernel_initializer=bert.initializer)(output) model = keras.models.Model(bert.model.input, output) # model.summary() model.compile( loss='sparse_categorical_crossentropy', optimizer=Adam(args.lr), metrics=['accuracy'], ) # 写好函数后,启用对抗训练只需要一行代码 adversarial_training(model, 'Embedding-Token', args.alpha) return model
def build_model(): with tf.device("/gpu:1"): model = build_transformer_model( config_path, checkpoint_path, ) # output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers-1) with tf.device("/gpu:0"): output_layer = 'Transformer-%s-FeedForward-Norm' % (bert_layers - 1) output = model.get_layer(output_layer).output # output = Bidirectional(LSTM(unit, return_sequences=True))(output) output = MyDense(num_labels)(output) CRF = MyConditionalRandomField(lr_multiplier=crf_lr_multiplier) output = CRF(output) model = Model(model.input, output) # model = multi_gpu_model(model,2) model.summary() model.compile(loss=CRF.sparse_loss, optimizer=Adam(learing_rate), metrics=[CRF.sparse_accuracy]) return model, CRF
def build_model(): """构建模型。""" model = build_transformer_model( config_path, checkpoint_path, model='nezha', application='unilm', keep_tokens=keep_tokens, # 只保留keep_tokens中的字,精简原字表 ) o_in = Input(shape=(None, )) train_model = Model(model.inputs + [o_in], model.outputs + [o_in]) # 交叉熵作为loss,并mask掉输入部分的预测 y_true = train_model.input[2][:, 1:] # 目标tokens y_mask = train_model.input[1][:, 1:] y_pred = train_model.output[0][:, :-1] # 预测tokens,预测与目标错开一位 cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred) cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask) train_model.add_loss(cross_entropy) train_model.compile(optimizer=Adam(1e-5)) return model, train_model