def compute_loss(self, inputs, mask=None): pred, ytrue = inputs acc = keras.metrics.sparse_categorical_accuracy(ytrue, pred) self.add_metric(acc, name='clf_acc') ytrue = K.cast(ytrue, 'int32') ytrue = K.one_hot(ytrue, num_classes=num_classes) ytrue = K.reshape(ytrue, (-1, num_classes)) loss = ytrue * K.log(pred + K.epsilon()) + (1 - ytrue) * K.log(1 - pred + K.epsilon()) loss = -K.mean(loss) loss = loss * self.alpha self.add_metric(loss, name='clf_loss') return loss
def mlm_loss(inputs): """计算loss的函数,需要封装为一个层 """ y_true, y_pred, mask = inputs loss = K.sparse_categorical_crossentropy(y_true, y_pred, from_logits=True) loss = K.sum(loss * mask) / (K.sum(mask) + K.epsilon()) return loss
def mlm_acc(inputs): """计算准确率的函数,需要封装为一个层 """ y_true, y_pred, mask = inputs y_true = K.cast(y_true, floatx) acc = keras.metrics.sparse_categorical_accuracy(y_true, y_pred) acc = K.sum(acc * mask) / (K.sum(mask) + K.epsilon()) return acc
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.99, epsilon=1e-6, bias_correct=True, **kwargs): kwargs['name'] = kwargs.get('name', 'Adam') super(Adam, self).__init__(**kwargs) self._set_hyper('learning_rate', learning_rate) self._set_hyper('beta_1', beta_1) self._set_hyper('beta_2', beta_2) self.epsilon = epsilon or K.epsilon() self.bias_correct = bias_correct
def compute_loss_of_scl(self, inputs, mask=None): y_pred, y_true = inputs label_mask = self.get_label_mask(y_true) y_pred = K.l2_normalize(y_pred, axis=1) # 特征向量归一化 similarities = K.dot(y_pred, K.transpose(y_pred)) # 相似矩阵 similarities = similarities - K.eye(K.shape(y_pred)[0]) * 1e12 # 排除对角线,即 i == j similarities = similarities / self.T # Temperature scale similarities = K.exp(similarities) # exp sum_similarities = K.sum(similarities, axis=-1, keepdims=True) # sum i != k scl = similarities / sum_similarities scl = K.log(scl + K.epsilon()) # sum log scl = -K.sum(scl * label_mask, axis=1, keepdims=True) / (K.sum(label_mask, axis=1, keepdims=True) + K.epsilon()) return K.mean(scl)
def __init__(self, learning_rate=0.001, beta_1=0.9, beta_2=0.999, amsgrad=False, **kwargs): self.initial_decay = kwargs.pop('decay', 0.0) self.epsilon = kwargs.pop('epsilon', K.epsilon()) learning_rate = kwargs.pop('lr', learning_rate) super(AdaBelief, self).__init__(**kwargs) with K.name_scope(self.__class__.__name__): self.iterations = K.variable(0, dtype='int64', name='iterations') self.learning_rate = K.variable(learning_rate, name='learning_rate') self.beta_1 = K.variable(beta_1, name='beta_1') self.beta_2 = K.variable(beta_2, name='beta_2') self.decay = K.variable(self.initial_decay, name='decay') self.amsgrad = amsgrad