def build_model(self): import tensorflow as tf from keras.backend.tensorflow_backend import set_session config = tf.ConfigProto() config.gpu_options.allocator_type = 'BFC' # A "Best-fit with coalescing" algorithm, simplified from a version of dlmalloc. if self.memory_fraction: config.gpu_options.per_process_gpu_memory_fraction = self.memory_fraction config.gpu_options.allow_growth = False else: config.gpu_options.allow_growth = True set_session(tf.Session(config=config)) # 补充输入 subject_labels = Input(shape=(None, 2), name='Subject-Labels') subject_ids = Input(shape=(2, ), name='Subject-Ids') object_labels = Input(shape=(None, self.num_classes, 2), name='Object-Labels') # 加载预训练模型 bert = build_transformer_model( config_path=self.bert_config_path, checkpoint_path=self.bert_checkpoint_path, return_keras_model=False, ) # 预测subject output = Dense(units=2, activation='sigmoid', kernel_initializer=bert.initializer)(bert.model.output) subject_preds = Lambda(lambda x: x**2)(output) self.subject_model = Model(bert.model.inputs, subject_preds) # 传入subject,预测object # 通过Conditional Layer Normalization将subject融入到object的预测中 output = bert.model.layers[-2].get_output_at(-1) subject = Lambda(self.extrac_subject)([output, subject_ids]) output = LayerNormalization(conditional=True)([output, subject]) output = Dense(units=self.num_classes * 2, activation='sigmoid', kernel_initializer=bert.initializer)(output) output = Lambda(lambda x: x**4)(output) object_preds = Reshape((-1, self.num_classes, 2))(output) self.object_model = Model(bert.model.inputs + [subject_ids], object_preds) # 训练模型 self.model = Model( bert.model.inputs + [subject_labels, subject_ids, object_labels], [subject_preds, object_preds]) mask = bert.model.get_layer('Embedding-Token').output_mask mask = K.cast(mask, K.floatx()) subject_loss = K.binary_crossentropy(subject_labels, subject_preds) subject_loss = K.mean(subject_loss, 2) subject_loss = K.sum(subject_loss * mask) / K.sum(mask) object_loss = K.binary_crossentropy(object_labels, object_preds) object_loss = K.sum(K.mean(object_loss, 3), 2) object_loss = K.sum(object_loss * mask) / K.sum(mask) self.model.add_loss(subject_loss + object_loss) AdamEMA = extend_with_exponential_moving_average(Adam, name='AdamEMA') self.optimizer = AdamEMA(lr=1e-4)
output = model.get_layer('MLM-Norm').output output = Dense(3, activation='softmax')(output) outputs = model.outputs + [output] # 预测用模型 model = Model(model.inputs, outputs) # 训练用模型 y_in = Input(shape=(None, )) l_in = Input(shape=(None, )) outputs = [y_in, model.inputs[1], l_in] + outputs outputs = CrossEntropy([3, 4])(outputs) train_model = Model(model.inputs + [y_in, l_in], outputs) AdamEMA = extend_with_exponential_moving_average(Adam, name='AdamEMA') optimizer = AdamEMA(learning_rate=2e-5, ema_momentum=0.9999) train_model.compile(optimizer=optimizer) train_model.summary() class AutoSummary(AutoRegressiveDecoder): """seq2seq解码器 """ def get_ngram_set(self, x, n): """生成ngram合集,返回结果格式是: {(n-1)-gram: set([n-gram的第n个字集合])} """ result = {} for i in range(len(x) - n + 1): k = tuple(x[i:i + n])