def kullback_leibler_divergence(y_true, y_pred): """Computes Kullback-Leibler divergence loss between `y_true` and `y_pred`. `loss = y_true * log(y_true / y_pred)` See: https://en.wikipedia.org/wiki/Kullback%E2%80%93Leibler_divergence Usage: ```python loss = tf.keras.losses.KLD([.4, .9, .2], [.5, .8, .12]) print('Loss: ', loss.numpy()) # Loss: 0.11891246 ``` Args: y_true: Tensor of true targets. y_pred: Tensor of predicted targets. Returns: A `Tensor` with loss. Raises: TypeError: If `y_true` cannot be cast to the `y_pred.dtype`. """ y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)
def tp(gt, pred): smooth = 1. pred_pos = backend.round(backend.clip(pred, 0, 1)) gt_pos = backend.round(backend.clip(gt, 0, 1)) tp = (backend.sum(gt_pos * pred_pos) + smooth) / (backend.sum(gt_pos) + smooth) return tp
def tn(y_true, y_pred): smooth = 1 y_pred_pos = K.round(K.clip(y_pred, 0, 1)) y_pred_neg = 1 - y_pred_pos y_pos = K.round(K.clip(y_true, 0, 1)) y_neg = 1 - y_pos tn = (K.sum(y_neg * y_pred_neg) + smooth) / (K.sum(y_neg) + smooth) return tn
def recall(y_true, y_pred): """Recall metric. Only computes a batch-wise average of recall. Computes the recall, a metric for multi-label classification of how many relevant items are selected. """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall
def jsloss(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) M = (y_true + y_pred) / 2 kl1 = 0.5 * math_ops.reduce_sum(y_true * math_ops.log(y_true / M), axis=-1) kl2 = 0.5 * math_ops.reduce_sum(y_pred * math_ops.log(y_pred / M), axis=-1) res = kl1 + kl2 return res
def confusion(gt, pred): pred_pos = backend.clip(pred, 0, 1) pred_neg = 1 - pred_pos gt_pos = backend.clip(gt, 0, 1) gt_neg = 1 - gt_pos tp = backend.sum(gt_pos * pred_pos) fp = backend.sum(gt_neg * pred_pos) fn = backend.sum(gt_pos * pred_neg) return tp, fp, fn
def tn(gt, pred): smooth = 1. pred_pos = backend.round(backend.clip(pred, 0, 1)) #round(逐元素四舍五入) pred_neg = 1 - pred_pos gt_pos = backend.round(backend.clip(gt, 0, 1)) gt_neg = 1 - gt_pos tn = (backend.sum(gt_neg * pred_neg) + smooth) / (backend.sum(gt_neg) + smooth) return tn
def precision(y_true, y_pred): """Precision metric. Only computes a batch-wise average of precision. Computes the precision, a metric for multi-label classification of how many selected items are relevant. """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision
def f1(y_true, y_pred): smooth = 1.0 y_label = K.round(K.clip(y_pred, 0, 1)) y_tp = K.round(K.clip(y_true * y_label, 0, 1)) y_tp_sum = K.sum(y_tp, axis=1) y_true_sum = K.sum(y_true, axis=1) y_label_sum = K.sum(y_label, axis=1) dice_array = (y_tp_sum * 2 + smooth) / (y_label_sum + y_true_sum + smooth) dice = K.mean(dice_array) return 1 - dice
def artifact_precision(y_true, y_pred): weights = y_true[:, :, :, :, 2] mask = tf.equal(weights, 1) mask_true = tf.boolean_mask(y_true[:, :, :, :, 2], mask) mask_pred = tf.boolean_mask(1 - y_pred[:, :, :, :, 0], mask) true_positives = K.sum(K.round(K.clip(mask_true * mask_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(mask_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision
def axon_recall(y_true, y_pred): weights = tf.reduce_sum(y_true, axis=-1) mask = tf.equal(weights, 1) mask_true = tf.boolean_mask(y_true[:, :, :, :, 0], mask) mask_pred = tf.boolean_mask(y_pred[:, :, :, :, 0], mask) true_positives = K.sum(K.round(K.clip(mask_true * mask_pred, 0, 1))) actual_positives = K.sum(K.round(K.clip(mask_true, 0, 1))) recall = true_positives / (actual_positives + K.epsilon()) return recall
def recall(y_true, y_pred): """Recall metric. Computes the recall over the whole batch using threshold_value. """ threshold_value = threshold # Adaptation of the "round()" used before to get the predictions. Clipping to make sure that the predicted raw values are between 0 and 1. y_pred = K.cast(K.greater(K.clip(y_pred, 0, 1), threshold_value), K.floatx()) # Compute the number of true positives. Rounding in prevention to make sure we have an integer. true_positives = K.round(K.sum(K.clip(y_true * y_pred, 0, 1))) # Compute the number of positive targets. possible_positives = K.sum(K.clip(y_true, 0, 1)) recall_ratio = true_positives / (possible_positives + K.epsilon()) return recall_ratio
def axon_precision(y_true, y_pred): weights = tf.reduce_sum(y_true, axis=-1) mask = tf.equal(weights, 1) mask_true = tf.boolean_mask(y_true[:, :, :, :, 0], mask) mask_pred = tf.boolean_mask(y_pred[:, :, :, :, 0], mask) true_positives = K.sum(K.round(K.clip(mask_true * mask_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(mask_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision
def contrastive_loss(y_true, y_pred): def _contrastive_loss(y1, D): g = tf.constant(1.0, shape=[1], dtype=tf.float32) return K.mean(y1 * K.square(D) + (g - y1) * K.square(K.maximum(g - D, 0))) y_pred = K.clip(y_pred, _EPSILON, 1.0 - _EPSILON) loss = tf.convert_to_tensor(0, dtype=tf.float32) g = tf.constant(1.0, shape=[1], dtype=tf.float32) h = tf.constant(0.0, shape=[1], dtype=tf.float32) for i in range(0, batch_size, 3): try: q_embedding = y_pred[i + 0] p_embedding = y_pred[i + 1] n_embedding = y_pred[i + 2] D_q_p = K.sqrt(K.sum((q_embedding - p_embedding)**2)) D_q_n = K.sqrt(K.sum((q_embedding - n_embedding)**2)) L_q_p = _contrastive_loss(g, D_q_p) L_q_n = _contrastive_loss(h, D_q_n) loss = (loss + L_q_p + L_q_n) except: continue loss = loss / (batch_size * 2 / 3) zero = tf.constant(0.0, shape=[1], dtype=tf.float32) return tf.maximum(loss, zero)
def binary_focal_loss_fixed(y_true, y_pred): """ :param y_true: A tensor of the same shape as `y_pred` :param y_pred: A tensor resulting from a sigmoid :return: Output tensor. """ pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) epsilon = K.epsilon() # clip to prevent NaN's and Inf's pt_1 = K.clip(pt_1, epsilon, 1. - epsilon) pt_0 = K.clip(pt_0, epsilon, 1. - epsilon) return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) \ -K.sum((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
def get_gradients(self, loss, params): """Returns gradients of `loss` with respect to `params`. Arguments: loss: Loss tensor. params: List of variables. Returns: List of gradient tensors. Raises: ValueError: In case any gradient cannot be computed (e.g. if gradient function not implemented). """ grads = K.gradients(loss, params) if None in grads: raise ValueError('An operation has `None` for gradient. ' 'Please make sure that all of your ops have a ' 'gradient defined (i.e. are differentiable). ' 'Common ops without gradient: ' 'K.argmax, K.round, K.eval.') if hasattr(self, 'clipnorm') and self.clipnorm > 0: norm = K.sqrt( sum([math_ops.reduce_sum(math_ops.square(g)) for g in grads])) grads = [clip_norm(g, self.clipnorm, norm) for g in grads] if hasattr(self, 'clipvalue') and self.clipvalue > 0: grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] return grads
def brd_max(inputs): b_S17 = inputs def _each(b_17): pT = tf.constant([0.8], dtype=tf.float32) pN = tf.constant([0.2], dtype=tf.float32) output = b_17 * init_max_value logging.getLogger().info("--_each\n %s" % output) output = K.sum(output, axis=None, keepdims=False) logging.getLogger().info("--sum\n %s" % output) output = tf.cond(tf.greater(output, init_max_hv), lambda: pT, lambda: pN) return output def get_pred(inputs): b_S17 = inputs output = tf.map_fn(lambda x: _each(x), b_S17, dtype=tf.float32) return tf.reshape(output, [-1, 1]) n_pred = brd_mean(b_S17) x_pred = get_pred(b_S17) n_pred = tf.reshape(n_pred, [-1]) x_pred = tf.reshape(x_pred, [-1]) ### xt = K.round(K.clip(x_pred, 0, 1)) xn = 1 - xt ### output = n_pred * xn + x_pred * xt return tf.reshape(output, [-1, 1])
def fbeta_score(y_true, y_pred, beta): '''Calculates the F score, the weighted harmonic mean of precision and recall. This is useful for multi-label classification, where input samples can be classified as sets of labels. By only using accuracy (precision) a model would achieve a perfect score by simply assigning every class to every input. In order to avoid this, a metric should penalize incorrect class assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0) computes this, as a weighted mean of the proportion of correct class assignments vs. the proportion of incorrect class assignments. With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning correct classesas_keras_metric becomes more important, and with beta > 1 the metric is instead weighted towards penalizing incorrect class assignments. ''' ''' https://github.com/keras-team/keras/blob/2b51317be82d4420169d2cc79dc4443028417911/keras/metrics.py ''' if beta < 0: raise ValueError('The lowest choosable beta is zero (only precision).') # If there are no true positives, fix the F score at 0 like sklearn. if K.sum(K.round(K.clip(y_true, 0, 1))) == 0: return 0 p = precision(y_true, y_pred) r = recall(y_true, y_pred) bb = beta**2 fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon()) return fbeta_score
def loss(y_true, y_pred): prob = K.sum(y_true * y_pred, axis=-1) # Multiply with the one hot encoded taken action old_prob = K.sum(y_true * old_prediction, axis=-1) r = prob / (old_prob + 1e-10) return -K.mean(K.minimum(r * advantage, K.clip( r, min_value=1 - loss_clipping, max_value=1 + loss_clipping) * advantage) + entropy_loss * -( prob * K.log(prob + 1e-10)))
def focal_loss(y_true, y_pred): # Define espislon so that the backpropagation will not result int NaN # for 0 divisor case epsilon = K.epsilon() # Add the epsilon to prediction value # y_pred = y_pred + epsilon # Clip the prediction value y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon) alpha_factor = K.ones_like(y_true) * alpha # Calculate p_t p_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) # Calculate alpha_t alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) # Calculate cross entropy cross_entropy = -K.log(p_t) weight = alpha_t * K.pow((1 - p_t), gamma) # Calculate focal loss loss = weight * cross_entropy # Sum the losses in mini_batch loss = K.sum(loss, axis=1) return loss
def get_f1(y_true, y_pred): #taken from old keras source code """ description: F1 value for accuracy input: 1. Real y to compare from 2. Predicted y to check accuracy from output: 1. The F1 value """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) recall = true_positives / (possible_positives + K.epsilon()) f1_val = 2*(precision*recall)/(precision+recall+K.epsilon()) return f1_val
def __call__(self, w): norms = K.sqrt( math_ops.reduce_sum(math_ops.square(w), axis=self.axis, keepdims=True)) desired = ( self.rate * K.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms) return w * (desired / (K.epsilon() + norms))
def angular_loss_2(y_true, y_pred): y_pred = K.clip(y_pred, _EPSILON, 1.0 - _EPSILON) loss = tf.convert_to_tensor(0, dtype=tf.float32) g = tf.constant(1.0, shape=[1], dtype=tf.float32) c = tf.constant(4.0, shape=[1], dtype=tf.float32) d = tf.constant(2.0, shape=[1], dtype=tf.float32) alpha = tf.constant(45.0, shape=[1], dtype=tf.float32) losses = [] losses2 = [] for i in range(0, batch_size, 3): try: xa = y_pred[i + 0] xp = y_pred[i + 1] xn = y_pred[i + 2] fapn = c * (tf.tan(alpha * K.transpose(xa + xp) * xn)** 2) - d * (g + tf.tan(alpha)**2) * K.transpose(xa) * xp losses.append(fapn) losses2.append(K.transpose(xa) * xn - K.transpose(xa) * xp) loss = (loss + g + _loss) except: continue loss = K.sum(K.log(1 + 2 * K.sum([K.exp(v) for v in losses]))) loss2 = K.sum(K.log(1 + 2 * K.sum([K.exp(v) for v in losses2]))) loss = loss + 2 * loss2 loss = loss / (batch_size / 3) zero = tf.constant(0.0, shape=[1], dtype=tf.float32) return tf.maximum(loss, zero)
def lossless_loss(y_true, y_pred): N = tf.constant(4096.0, shape=[1], dtype=tf.float32) beta = tf.constant(4096.0, shape=[1], dtype=tf.float32) y_pred = K.clip(y_pred, _EPSILON, 1.0 - _EPSILON) loss = tf.convert_to_tensor(0, dtype=tf.float32) g = tf.constant(1.0, shape=[1], dtype=tf.float32) const1 = tf.constant(1.0, shape=[1], dtype=tf.float32) for i in range(0, batch_size, 3): try: anchor = y_pred[i + 0] positive = y_pred[i + 1] negative = y_pred[i + 2] pos_dist = K.sum(K.square(anchor - positive), 1) neg_dist = K.sum(K.square(anchor, negative), 1) pos_dist = -tf.log(-tf.divide((pos_dist), beta) + const1 + epsilon) neg_dist = -tf.log(-tf.divide((N - neg_dist), beta) + const1 + epsilon) _loss = neg_dist + pos_dist loss = (loss + g + _loss) except: continue loss = loss / (batch_size / 3) zero = tf.constant(0.0, shape=[1], dtype=tf.float32) return tf.maximum(loss, zero)
def __call__(self, w): norms = K.sqrt( math_ops.reduce_sum(math_ops.square(w), axis=self.axis, keepdims=True)) desired = K.clip(norms, 0, self.max_value) return w * (desired / (K.epsilon() + norms))
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [state_ops.assign_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * ( 1. / (1. + self.decay * math_ops.cast(self.iterations, K.dtype(self.decay)))) t = math_ops.cast(self.iterations, K.floatx()) + 1 lr_t = lr * ( K.sqrt(1. - math_ops.pow(self.beta_2, t)) / (1. - math_ops.pow(self.beta_1, t))) final_lr = self.final_lr * lr / self.base_lr lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1)) upper_bound = final_lr * (1. + 1. / (self.gamma * t)) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsbound: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * math_ops.square(g) if self.amsbound: vhat_t = math_ops.maximum(vhat, v_t) p_t = p - m_t * K.clip(lr_t / (K.sqrt(vhat_t) + self.epsilon), lower_bound, upper_bound) self.updates.append(state_ops.assign(vhat, vhat_t)) else: p_t = p - m_t * K.clip(lr_t / (K.sqrt(v_t) + self.epsilon), lower_bound, upper_bound) self.updates.append(state_ops.assign(m, m_t)) self.updates.append(state_ops.assign(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(state_ops.assign(p, new_p)) return self.updates
def loss(y_true, y_pred): prob = K.sum(y_true * y_pred) old_prob = K.sum(y_true * old_prediction) r = prob / (old_prob + 1e-10) return -K.log(prob + 1e-10) * K.mean( K.minimum(r * advantage, K.clip(r, min_value=0.8, max_value=1.2) * advantage))
def __call__(self, w): norms = backend.sqrt( math_ops.reduce_sum(math_ops.square(w), axis=self.axis, keepdims=True)) desired = ( self.rate * backend.clip(norms, self.min_value, self.max_value) + (1 - self.rate) * norms) return w * (desired / (backend.epsilon() + norms))
def precision(y_target, y_pred): # clip(t, clip_value_min, clip_value_max) : clip_value_min~clip_value_max 이외 가장자리를 깎아 낸다 # round : 반올림한다 y_pred_yn = K.round(K.clip(y_pred, 0, 1)) # 예측값을 0(Negative) 또는 1(Positive)로 설정한다 y_target_yn = K.round(K.clip(y_target, 0, 1)) # 실제값을 0(Negative) 또는 1(Positive)로 설정한다 # True Positive는 실제 값과 예측 값이 모두 1(Positive)인 경우이다 count_true_positive = K.sum(y_target_yn * y_pred_yn) # (True Positive + False Positive) = 예측 값이 1(Positive) 전체 count_true_positive_false_positive = K.sum(y_pred_yn) # Precision = (True Positive) / (True Positive + False Positive) # K.epsilon()는 'divide by zero error' 예방차원에서 작은 수를 더한다 precision = count_true_positive / (count_true_positive_false_positive + K.epsilon()) # return a single tensor value return precision
def _hard_sigmoid(x): '''Hard sigmoid different from the more conventional form (see definition of K.hard_sigmoid). # Reference: - [BinaryNet: Training Deep Neural Networks with Weights and Activations Constrained to +1 or -1, Courbariaux et al. 2016](http://arxiv.org/abs/1602.02830} ''' x = (0.5 * x) + 0.5 return K.clip(x, 0, 1)
def weighted_loss(y_true, y_pred): # return weighted_categorical_cross_entropy(y_true, y_pred, class_weights) y_pred /= K.sum(y_pred, axis=-1, keepdims=True) # clip to prevent NaN's and Inf's y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # calc loss = y_true * class_weights * K.log(y_pred) loss = -K.sum(loss, -1) return loss
def mean_squared_logarithmic_error(y_true, y_pred): # pylint: disable=missing-docstring y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.) return K.mean(math_ops.squared_difference(first_log, second_log), axis=-1)
def mean_absolute_percentage_error(y_true, y_pred): # pylint: disable=missing-docstring y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) diff = math_ops.abs( (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None)) return 100. * K.mean(diff, axis=-1)
def kullback_leibler_divergence(y_true, y_pred): # pylint: disable=missing-docstring y_pred = ops.convert_to_tensor(y_pred) y_true = math_ops.cast(y_true, y_pred.dtype) y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)
def mean_absolute_percentage_error(y_true, y_pred): diff = math_ops.abs( (y_true - y_pred) / K.clip(math_ops.abs(y_true), K.epsilon(), None)) return 100. * K.mean(diff, axis=-1)
def mean_squared_logarithmic_error(y_true, y_pred): first_log = math_ops.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = math_ops.log(K.clip(y_true, K.epsilon(), None) + 1.) return K.mean(math_ops.square(first_log - second_log), axis=-1)
def kullback_leibler_divergence(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) return math_ops.reduce_sum(y_true * math_ops.log(y_true / y_pred), axis=-1)