def parse_function(serialized): features = { 'token_ids': tf.io.FixedLenFeature([sequence_length], tf.int64), } features = tf.io.parse_single_example(serialized, features) token_ids = features['token_ids'] segment = K.random_uniform(shape=[1], minval=1, maxval=sequence_length - 1, dtype='int64')[0] segment_ids = K.one_hot(segment + 1, sequence_length) segment_ids = K.cast(K.cumsum(segment_ids), 'int64') token_ids_1 = token_ids[:segment] token_ids_2 = K.zeros([1], dtype='int64') + token_sep_id token_ids_3 = token_ids[segment:-1] token_ids = K.concatenate([token_ids_1, token_ids_2, token_ids_3]) x = { 'Input-Token': token_ids, 'Input-Segment': segment_ids, } y = { 'unilm_loss': K.zeros([1]), 'unilm_acc': K.zeros([1]), } return x, y
def compute_copy_loss(self, inputs, mask=None): _, y_mask, y_true, _, y_pred = inputs y_mask = K.cumsum(y_mask[:, ::-1], axis=1)[:, ::-1] y_mask = K.cast(K.greater(y_mask, 0.5), K.floatx()) y_mask = y_mask[:, 1:] # mask标记,减少一位 y_pred = y_pred[:, :-1] # 预测序列,错开一位 loss = K.sparse_categorical_crossentropy(y_true, y_pred) loss = K.sum(loss * y_mask) / K.sum(y_mask) return loss
def compute_loss(self, inputs, mask=None): q_start_in, q_end_in, q_label_in, ps_category, ps_heads, ps_tails = inputs if mask is None: mask = 1.0 else: mask = K.cast(mask, K.floatx()) loss0 = K.sparse_categorical_crossentropy(q_label_in, ps_category, from_logits=True) loss0 = K.mean(loss0) loss0 = K.sum(loss0 * mask) / K.sum(mask) loss1 = K.categorical_crossentropy(q_start_in, ps_heads, from_logits=True) loss1 = K.mean(loss1) ps_tails = ps_tails - (1 - K.cumsum(q_start_in, axis=1)) * 1e10 loss2 = K.mean( K.categorical_crossentropy(q_end_in, ps_tails, from_logits=True)) loss2 = K.mean(loss2) # 总的loss return loss0 + loss1 + loss2