def masked_cross_entropy(y_true, y_pred): """交叉熵作为loss,并mask掉padding部分的预测 """ y_true = K.reshape(y_true, [K.shape(y_true)[0], -1]) y_mask = K.cast(K.not_equal(y_true, 0), K.floatx()) cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred) cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask) return cross_entropy
def compile(self): # 交叉熵作为loss,并mask掉输入部分的预测 y_true = self.model.input[0][:, 1:] # 目标tokens y_mask = self.model.input[1][:, 1:] # 目标mask y_mask = K.cast(y_mask, K.floatx()) # 转为浮点型 y_pred = self.model.output[:, :-1] # 预测tokens,预测与目标错开一位 cross_entropy = K.sparse_categorical_crossentropy(y_true, y_pred) cross_entropy = K.sum(cross_entropy * y_mask) / K.sum(y_mask) self.model.add_loss(cross_entropy) opt = extend_with_gradient_accumulation(Adam)(learning_rate=0.000015, grad_accum_steps=2) self.model.compile(optimizer=opt)
def sparse_accuracy(self, y_true, y_pred): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 if self.input_mask is None: mask = None else: mask = K.cast(self.input_mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 逐标签取最大来粗略评测训练效果 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) if mask is None: return K.mean(isequal) else: return K.sum(isequal * mask) / K.sum(mask)
def basic_accuracy(self, y_true, y_pred, go_backwards=False): """训练过程中显示逐帧准确率的函数,排除了mask的影响 此处y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 if self.input_mask is None: mask = None else: mask = K.cast(self.input_mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 反转相关 if self.hidden_dim is None: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans histoty = K.gather(trans, y_true) else: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) r_trans, l_trans = self.l_trans, self.r_trans else: l_trans, r_trans = self.l_trans, self.r_trans histoty = K.gather(l_trans, y_true) histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans) # 计算逐标签accuracy histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 y_pred = K.cast(K.argmax(y_pred, 2), 'int32') isequal = K.cast(K.equal(y_true, y_pred), K.floatx()) if mask is None: return K.mean(isequal) else: return K.sum(isequal * mask) / K.sum(mask)
def basic_loss(self, y_true, y_pred, go_backwards=False): """y_true需要是整数形式(非one hot) """ # 导出mask并转换数据类型 if self.input_mask is None: mask = None else: mask = K.cast(self.input_mask, K.floatx()) # y_true需要重新明确一下shape和dtype y_true = K.reshape(y_true, K.shape(y_pred)[:-1]) y_true = K.cast(y_true, 'int32') # 反转相关 if self.hidden_dim is None: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) trans = K.transpose(self.trans) else: trans = self.trans histoty = K.gather(trans, y_true) else: if go_backwards: # 是否反转序列 y_true, y_pred = self.reverse_sequence([y_true, y_pred], mask) r_trans, l_trans = self.l_trans, self.r_trans else: l_trans, r_trans = self.l_trans, self.r_trans histoty = K.gather(l_trans, y_true) histoty = tf.einsum('bnd,kd->bnk', histoty, r_trans) # 计算loss histoty = K.concatenate([y_pred[:, :1], histoty[:, :-1]], 1) y_pred = (y_pred + histoty) / 2 loss = K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True) if mask is None: return K.mean(loss) else: return K.sum(loss * mask) / K.sum(mask)
def reverse_sequence(self, inputs, mask=None): if mask is None: return [x[:, ::-1] for x in inputs] else: length = K.cast(K.sum(mask, 1), 'int32') return [tf.reverse_sequence(x, length, seq_axis=1) for x in inputs]