def classifier_cat_loss(y_true, y_pred): ''' classifier loss based on categorical cross entropy in the target domain 0: batch_size - is source samples batch_size: batch_size+int(batch_size*label_percent) - is target labeled samples batch_size+int(batch_size*label_percent): end - is target unlabeled samples self.gamma - is the optimal transport plan ''' ys = y_true[:batch_size,:] # source true labels yt = y_true[batch_size:batch_size+int(batch_size*label_percent),:] # target true labels source_ypred = y_pred[:batch_size, :] # source prediction ypred_t = y_pred[batch_size:batch_size+int(batch_size*label_percent),:] # target labeled prediction ypred_tt = y_pred[batch_size+int(batch_size*label_percent):, :] # target unlabeled prediction source_loss = K.mean(K.categorical_crossentropy(ys, source_ypred)) # source cross entropy target_loss = K.mean(K.categorical_crossentropy(yt, ypred_t)) # target labeled cross entropy # loss calculation based on double sum (sum_ij (ys^i, ypred_tt^j)) ypred_tt = K.log(ypred_tt) loss = -K.dot(ys, K.transpose(ypred_tt)) group_loss = K.sum(self.gamma[:, len(yt):] * loss) # print loss value # print(K.print_tensor(group_loss, message='group_loss = ')) # print(K.print_tensor(source_loss, message='source_loss = ')) # print(K.print_tensor(target_loss, message='target_loss = ')) # returns source loss + target loss + group loss return self.tloss * target_loss + self.gloss * group_loss + self.sloss * source_loss
def categorical_crossentropy(inputs): print("Categorical cross entropy inputs : ", inputs) if len(inputs) == 2: [mu1, mu2] = inputs if isinstance(mu2, list): return average( [K.categorical_crossentropy(mu1, pred) for pred in mu2]) else: return K.categorical_crossentropy(mu1, mu2) else: true = inputs[0] return average([ K.categorical_crossentropy(true, inputs[pred]) for pred in range(1, len(inputs)) ])
def selective_loss(y_true, y_pred): em_c = K.mean(y_pred[:, -1]) loss = K.categorical_crossentropy( K.repeat_elements(y_pred[:, -1:], self.num_classes, axis=1) * y_true[:, :], y_pred[:, :-1]) + lamda * K.maximum(-em_c + c, 0)**2 return loss
def tversky_crossentropy(y_true, y_pred): tversky = tversky_loss(y_true, y_pred) crossentropy = K.categorical_crossentropy(y_true, y_pred) crossentropy = K.mean(crossentropy) return tversky + crossentropy
def focal_loss_ce(self, y_true, y_pred): # only missing in this FL is y_pred clipping weight = (1 - y_pred) weight *= weight # alpha = 0.25 weight *= 0.25 return K.categorical_crossentropy(weight * y_true, y_pred)
def custom_loss(y_true, y_predict): # y_true is the shape of the last leyer of the model i.e. (?, 23), where ? is the batch size # y_pred will have shape (?, 23), same as y_true # Both y_pred and y_true are tensors # Want to use categorical crossentrophy but with a penalty for being far from actual answer loss_variable = calculate_new_loss_variable(y_true, y_predict) return K.categorical_crossentropy(y_true, y_predict) + loss_variable
def ssd_loss(y_true, y_pred): ''' https://arxiv.org/pdf/1512.02325.pdf Arguments y_true: (h*w*k, total_classes+1+4) y_pred: (h*w*k, total_classes+1+4) Return loss ''' true_clz_2dtensor = y_true[:, :total_classes+1] # (h*w*k, total_classes+1) pred_clz_2dtensor = y_pred[:, :total_classes+1] # (h*w*k, total_classes+1) true_loc_2dtensor = y_true[:, total_classes+1:] # (h*w*k, 4) pred_loc_2dtensor = y_pred[:, total_classes+1:] # (h*w*k, 4) sum_true_clz_2dtensor = tf.math.reduce_sum(input_tensor=true_clz_2dtensor, axis=-1) # (h*w*k,) selected_clz_indices = tf.where( condition=tf.math.equal(x=sum_true_clz_2dtensor, y=1)) # foreground, background selected_loc_indices = tf.where( condition=tf.math.logical_and( x=tf.math.equal(x=sum_true_clz_2dtensor, y=1), y=tf.math.not_equal(x=true_clz_2dtensor[:, -1], y=1))) # foreground true_clz_2dtensor = tf.gather_nd(params=true_clz_2dtensor, indices=selected_clz_indices) # (fb, total_classes+1) pred_clz_2dtensor = tf.gather_nd(params=pred_clz_2dtensor, indices=selected_clz_indices) # (fb, total_classes+1) true_loc_2dtensor = tf.gather_nd(params=true_loc_2dtensor, indices=selected_loc_indices) # (f, 4) pred_loc_2dtensor = tf.gather_nd(params=pred_loc_2dtensor, indices=selected_loc_indices) # (f, 4) clz_loss = categorical_crossentropy(true_clz_2dtensor, pred_clz_2dtensor) # (fb,) loc_loss = tf.math.reduce_sum(input_tensor=smooth_l1(true_loc_2dtensor, pred_loc_2dtensor), axis=-1) # (f,) loss = tf.math.reduce_mean(clz_loss) + lamda*tf.math.reduce_mean(loc_loss) return loss
def compute_loss(y_true, y_pred): pred_label_probs = K.reduce_sum( y_true * y_pred) # predicted probs of correct labels loss_mask = pred_label_probs < threshold # compute loss for less confident preds loss = K.categorical_crossentropy(y_true[loss_mask], y_pred[loss_mask]) return loss
def diag_fisher(model, data): """ This function assumes that the last layer of the model has softmax activation """ if model.layers[-1].activation.__name__ != 'softmax': from_logits = False if model.layers[-1].activation.__name__ != 'linear': from_logits = True else: raise InputError( "The last layer has to have softmax or linear activation") xs = tf.Variable(data) with tf.GradientTape() as tape: y = model(xs) max_vals = tf.reduce_max(y, axis=1, keepdims=True) cond = tf.equal(y, max_vals) y_pred = tf.where(cond, tf.ones_like(y), tf.zeros_like(y)) cc = K.categorical_crossentropy(y_pred, y, from_logits=False) tape_grad = tape.gradient(cc, model.trainable_variables) sess = K.get_session() sess.run(tf.variables_initializer([xs])) grads = sess.run(tape_grad) fisher = [g**2 for g in grads] fisher_flatten = np.concatenate([np.reshape(f, (-1)) for f in fisher]).reshape(-1) return fisher_flatten
def weightedLoss(true, pred, weightsList): axis = -1 # if channels last # axis= 1 #if channels first # argmax returns the index of the element with the greatest value # done in the class axis, it returns the class index classSelectors = K.argmax(true, axis=axis) # if your loss is sparse, use only true as classSelectors # considering weights are ordered by class, for each class # true(1) if the class index is equal to the weight index classSelectors = [ K.equal(tf.cast(i, tf.int64), tf.cast(classSelectors, tf.int64)) for i in range(len(weightsList)) ] # casting boolean to float for calculations # each tensor in the list contains 1 where ground true class is equal to its index # if you sum all these, you will get a tensor full of ones. classSelectors = [K.cast(x, K.floatx()) for x in classSelectors] # for each of the selections above, multiply their respective weight weights = [sel * w for sel, w in zip(classSelectors, weightsList)] # sums all the selections # result is a tensor with the respective weight for each element in predictions weightMultiplier = weights[0] for i in range(1, len(weights)): weightMultiplier = weightMultiplier + weights[i] # make sure your originalLossFunc only collapses the class axis # you need the other axes intact to multiply the weights tensor loss = K.categorical_crossentropy(true, pred) loss = loss * weightMultiplier return loss
def map_fn(i): std_samples = dist.sample(1) distorted_loss = K.categorical_crossentropy(pred + std_samples, true, from_logits=True) diff = undistorted_loss - distorted_loss return -K.elu(diff)
def focal_loss_fixed(target_tensor, prediction_tensor): ''' prediction_tensor is the output tensor with shape [None, 100], where 100 is the number of classes target_tensor is the label tensor, same shape as predcition_tensor ''' import tensorflow as tf from tensorflow.python.ops import array_ops #1# get focal loss with no balanced weight which presented in paper function (4) zeros = array_ops.zeros_like(prediction_tensor, dtype=prediction_tensor.dtype) one_minus_p = array_ops.where(tf.greater(target_tensor,zeros), target_tensor - prediction_tensor, zeros) FT = -1 * (one_minus_p ** gamma) * tf.log(tf.clip_by_value(prediction_tensor, 1e-8, 1.0)) #2# get balanced weight alpha classes_weight = array_ops.zeros_like(prediction_tensor, dtype=prediction_tensor.dtype) total_num = float(sum(classes_num)) classes_w_t1 = [total_num / ff for ff in classes_num ] sum_ = sum(classes_w_t1) classes_w_t2 = [ff/sum_ for ff in classes_w_t1 ] #scale classes_w_tensor = tf.convert_to_tensor(classes_w_t2, dtype=prediction_tensor.dtype) classes_weight += classes_w_tensor alpha = array_ops.where(tf.greater(target_tensor, zeros), classes_weight, zeros) #3# get balanced focal loss balanced_fl = alpha * FT balanced_fl = tf.reduce_mean(balanced_fl) #4# add other op to prevent overfit # reference : https://spaces.ac.cn/archives/4493 nb_classes = len(classes_num) fianal_loss = (1-e) * balanced_fl + e * K.categorical_crossentropy(K.ones_like(prediction_tensor)/nb_classes, prediction_tensor) return fianal_loss
def rpn_loss(y_true, y_pred): ''' Arguments y_true: (batch_size, h, w, 6k) y_pred: (batch_size, h, w, 6k) Return loss ''' K = y_pred.shape[3]//6 true_clz_4dtensor = y_true[:, :, :, :2*K] # (batch_size, h, w, 2k) true_bbe_4dtensor = y_true[:, :, :, 2*K:] # (batch_size, h, w, 4k) pred_clz_4dtensor = y_pred[:, :, :, :2*K] # (batch_size, h, w, 2k) pred_bbe_4dtensor = y_pred[:, :, :, 2*K:] # (batch_size, h, w, 4k) true_clz_2dtensor = tf.reshape(tensor=true_clz_4dtensor, shape=[-1, 2]) # (h*w*k, 2) true_bbe_2dtensor = tf.reshape(tensor=true_bbe_4dtensor, shape=[-1, 4]) # (h*w*k, 4) pred_clz_2dtensor = tf.reshape(tensor=pred_clz_4dtensor, shape=[-1, 2]) # (h*w*k, 2) pred_bbe_2dtensor = tf.reshape(tensor=pred_bbe_4dtensor, shape=[-1, 4]) # (h*w*k, 4) # add small value when output is zeros, avoid log(0) = -inf pred_clz_2dtensor = tf.where( condition=tf.math.equal(x=pred_clz_2dtensor, y=0.0), x=0.00001, y=pred_clz_2dtensor) LAMBDA = 1.0 L_clz = categorical_crossentropy(target=true_clz_2dtensor, output=pred_clz_2dtensor) # (h*w*k) L_bbe = balanced_l1(true_bbe_2dtensor, pred_bbe_2dtensor) # (h*w*k, 4) L_bbe = sum(x=L_bbe, axis=-1) # (h*w*k) L = mean(L_clz) + LAMBDA*mean(true_clz_2dtensor[:, 0]*L_bbe) return L
def rpn_loss_regr_fixed_num(y_true, y_pred): shape = K.shape(y_true) true_reshaped = K.reshape(y_true, (C.BATCH_SIZE, 7, 7, 5, 25)) pred_reshaped = K.reshape(y_pred, (C.BATCH_SIZE, 7, 7, 5, 25)) mask = true_reshaped[:,:,:,:,4] # class_mask = K.reshape(K.repeat_elements(mask,20,3), (C.BATCH_SIZE,7,7,5,20)) # coord_mask = K.reshape(K.repeat_elements(mask,4,3), (C.BATCH_SIZE,7,7,5,4)) # object_mask = mask # no_object_mask = 1 - mask class_loss = 10 * (1 - K.categorical_crossentropy(true_reshaped[:,:,:,:,5:],K.softmax(pred_reshaped[:,:,:,:,5:]))) object_square = K.square(1 - K.sigmoid(pred_reshaped[:,:,:,:,4])) object_loss = object_lambda * K.sum(object_square) no_object_square = K.square(0 - K.sigmoid(pred_reshaped[:,:,:,:,4])) no_object_loss = object_lambda * K.sum(no_object_square) coord_square = K.square(true_reshaped[:,:,:,:,:4] - pred_reshaped[:,:,:,:,:4]) coord_loss = coord_lambda * K.sum(coord_square) return (class_loss + object_loss + no_object_loss + coord_loss)
def sensitivtyNLL(self, data, labels, batchSize=128): """ Returns the sensitivity of the categorical crossentropy with respect to the input data. :param data: Input data. :param labels: Respective labels. :param batchSize: The network iterates through the dataset with batches, whose batch size is given by this parameter. """ # Gradient of the categorical crossentropy of the model output regarding # the model input grads = K.gradients( K.categorical_crossentropy(target=labels, output=self.network.output, from_logits=False), self.network.input)[0] # Define a Keras function to calculate the gradient for a given input grads_func = K.function([self.network.input], [grads]) # Run the calculation for the gradients sens = applyFunctionBatchwise(grads_func, data, batch_size=batchSize) #sens = grads_func([data]) return sens
def mean_score_classification_loss(y_true, y_pred): """ Use the mean score of an image against all the samples from the same class to get a score per class for each image. """ y_pred_by_label = tf.linalg.normalize( tf.linalg.matmul(y_pred, tf.math.divide_no_nan(y_true, tf.reduce_sum(y_true, axis=0))), ord=1, axis=1 )[0] return K.categorical_crossentropy(y_true, y_pred_by_label)
def knowledge_distillation_loss(target, output, classical_loss_weight, temperature, num_classes): """ loss used for knowledge distillation :param target: one-hot encoded ground truth classes and teacher logits :param output: output of the student :param classical_loss_weight: weight (float between 0 and 1) of the classical cross entropy loss of the student :param temperature: temperature that was used for the teacher predictions :param num_classes: the number of classes used in this classification problem :return: the loss to backprop on for the student """ target_true, target_teacher = target[:, :num_classes], target[:, num_classes:] output_normal, output_soft = softmax(output), softmax(output / temperature) return classical_loss_weight * categorical_crossentropy(target_true, output_normal) + \ (1 - classical_loss_weight) * (temperature ** 2) * categorical_crossentropy(target_teacher, output_soft)
def sgd(x, x0, target, pred, alpha, lmb): # compute loss loss1 = K.categorical_crossentropy(target, pred) loss2 = K.mean(K.square(x - x0), axis=[1, 2, 3]) loss = loss1 + 0.5 * lmb * loss2 deriv, = K.gradients(loss, x) next_x = K.clip(x - alpha * K.sign(deriv), 0, 1) return next_x, loss
def sup_loss(y_true, y_pred): m = K.sum(y_true, axis=-1) return K.switch( K.equal(K.sum(y_true), 0), 0., K.sum( K.categorical_crossentropy(tf.boolean_mask(y_true, m), tf.boolean_mask(y_pred, m), from_logits=True)) / K.sum(y_true))
def dice_crossentropy(y_truth, y_pred): # Obtain Soft DSC dice = dice_soft_loss(y_truth, y_pred) # Obtain Crossentropy crossentropy = K.categorical_crossentropy(y_truth, y_pred) crossentropy = K.mean(crossentropy) # Return sum return dice + crossentropy
def masked_categorical_crossentropy(y_true, y_pred): mask = K.all(K.equal(y_true, mask_value), axis=-1) mask = 1 - K.cast(mask, K.floatx()) loss = K.categorical_crossentropy(y_true, y_pred) * mask return K.sum(loss) / K.sum(mask)
def weighted_categorical_crossentropy( y_true: np.ndarray, y_pred: np.ndarray ) -> float: weight = K.max(y_true) y_true /= weight loss = K.categorical_crossentropy(y_true, y_pred) * weight return loss
def tversky_crossentropy(y_truth, y_pred): # Obtain Tversky Loss tversky = tversky_loss(y_truth, y_pred) # Obtain Crossentropy crossentropy = K.categorical_crossentropy(y_truth, y_pred) crossentropy = K.mean(crossentropy) # Return sum return tversky + crossentropy
def match_beliefs(X_test, student, teacher): # Total steps K = 30 # Total classes C = 10 # Other classes C_other = 9 # Pair P = 2 X_test = to_tensor(X_test) student_preds = tf.argmax(student(X_test), -1) teacher_preds = tf.argmax(teacher(X_test), -1) common_preds = tf.reshape(tf.where(student_preds == teacher_preds), [-1]) num_common_preds = tf.size(common_preds) transition_result = np.empty((num_common_preds, C_other, K, P)) for row, i in enumerate(tqdm(common_preds)): X = X_test[i:i + 1] student_pred = np.argmax(student.predict(X)[0]) other_classes = set(digit for digit in range(C)) other_classes.remove(student_pred) for col, other_class in enumerate(other_classes): X_step = X for step in range(K): with tf.GradientTape() as gradientTape: gradientTape.watch(X_step) student_pred = student(X_step) teacher_pred = teacher(X_step) loss = categorical_crossentropy( tf.one_hot([other_class], C), student_pred) X_step -= gradientTape.gradient(loss, X_step) transition_step = [ student_pred[0, other_class], teacher_pred[0, other_class] ] transition_result[row, col, step] = transition_step return transition_result
def masked_categorical_crossentropy(y_true, y_pred): mask_value = 0 y_true_id = K.argmax(y_true) mask = K.cast(K.equal(y_true_id, mask_value), K.floatx()) mask = 1.0 - mask loss = K.categorical_crossentropy(y_true, y_pred) * mask # take average w.r.t. the number of unmasked entries return K.sum(loss) / K.sum(mask)
def w_categorical_crossentropy(y_true, y_pred, weights): nb_cl = len(weights) final_mask = K.zeros_like(y_pred[:, 0]) y_pred_max = K.max(y_pred, axis=1) y_pred_max = K.expand_dims(y_pred_max, 1) y_pred_max_mat = K.equal(y_pred, y_pred_max) for c_p, c_t in product(range(nb_cl), range(nb_cl)): final_mask += (K.cast(weights[c_t, c_p],K.floatx()) * K.cast(y_pred_max_mat[:, c_p] ,K.floatx())* K.cast(y_true[:, c_t],K.floatx())) return K.categorical_crossentropy(y_pred, y_true) * final_mask
def focal_loss_ce(y_true, y_pred): """Alternative CE focal loss (not used) """ # only missing in this FL is y_pred clipping weight = (1 - y_pred) weight *= weight # alpha = 0.25 weight *= 0.25 return K.categorical_crossentropy(weight*y_true, y_pred)
def loss_metric(y_true, y_pred): #We have two outputs... get the tuple policy_true, value_true = y_true policy_pred, value_pred = y_pred CE = K.categorical_crossentropy(policy_true, policy_pred) MSE = K.mean_squared_error(value_true, value_pred) return CE + MSE
def cce_iou_dice(y_true, y_pred, smooth=1, cat_weight=1, iou_weight=1, dice_weight=1): return cat_weight * K.categorical_crossentropy(y_true, y_pred) \ + iou_weight * log_iou(y_true, y_pred, smooth) \ + dice_weight * log_dice(y_true, y_pred, smooth)
def weighted_crossentropy(targets, inputs): # we use a modulating factor which down-weights the loss assigned to well-classified examples to prevent numerous easy examples from overwhelming the classifier. y_class = K.argmax(inputs, axis=1) w = tf.gather(alpha_cb, y_class) bce = K.categorical_crossentropy(targets, inputs) bce_exp = K.exp(-bce) # focal loss fl = K.mean(w * K.pow((1-bce_exp), gamma) * bce) return fl