def loss_function(target_subtoken, y_pred): # prediction is a probability, log probability for speed and smoothness print("Model objective: y_pred.shape: {}".format(y_pred.shape)) # I_C = vector of a target subtoken exist in the input token - TODO probably not ok, debug using TF eager I_C = K.expand_dims( K.cast(K.any(K.equal(input_code_subtoken, K.cast(target_subtoken, 'int32')), axis=-1), dtype='float32'), -1) print("Model objective: I_C.shape: {}".format(I_C.shape)) # I_C shape = [batch_size, token, max_char_len, 1] # TODO should I add a penality if there is no subtokens appearing in the model ? Yes probability_correct_copy = K.log(copy_probability) + K.log( K.sum(I_C * copy_weights) + mu) print("Model objective: probability_correct_copy.shape: {}".format( probability_correct_copy.shape)) # penalise the model when cnn-attention predicts unknown # but the value can be predicted from the copy mechanism. mask_unknown = K.cast(K.equal(target_subtoken, unknown_id), dtype='float32') * mu probability_target_token = K.sum( K.log(1 - copy_probability) + K.log(y_pred) + mask_unknown, -1, True) print("Model objective: probability_target_token.shape: {}".format( probability_target_token.shape)) loss = K.logsumexp( [probability_correct_copy, probability_target_token]) return K.mean(loss)
def categorical_crossentropy(y_true, y_pred, class_weights=None, axis=None, from_logits=False): """Categorical crossentropy between an output tensor and a target tensor. Args: y_true: A tensor of the same shape as output. y_pred: A tensor resulting from a softmax (unless from_logits is True, in which case y_pred is expected to be the logits). from_logits: Boolean, whether y_pred is the result of a softmax, or is a tensor of logits. Returns: tensor: Output tensor. """ # Note: tf.nn.softmax_cross_entropy_with_logits # expects logits, Keras expects probabilities. if axis is None: axis = 1 if K.image_data_format( ) == 'channels_first' else K.ndim(y_pred) - 1 if not from_logits: # scale preds so that the class probas of each sample sum to 1 y_pred = y_pred / K.sum(y_pred, axis=axis, keepdims=True) # manual computation of crossentropy _epsilon = tf.convert_to_tensor(K.epsilon(), y_pred.dtype.base_dtype) y_pred = tf.clip_by_value(y_pred, _epsilon, 1. - _epsilon) if class_weights is None: return -K.sum(y_true * K.log(y_pred), axis=axis) return -K.sum((y_true * K.log(y_pred) * class_weights), axis=axis) return tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)
def angular_loss_2(y_true, y_pred): y_pred = K.clip(y_pred, _EPSILON, 1.0 - _EPSILON) loss = tf.convert_to_tensor(0, dtype=tf.float32) g = tf.constant(1.0, shape=[1], dtype=tf.float32) c = tf.constant(4.0, shape=[1], dtype=tf.float32) d = tf.constant(2.0, shape=[1], dtype=tf.float32) alpha = tf.constant(45.0, shape=[1], dtype=tf.float32) losses = [] losses2 = [] for i in range(0, batch_size, 3): try: xa = y_pred[i + 0] xp = y_pred[i + 1] xn = y_pred[i + 2] fapn = c * (tf.tan(alpha * K.transpose(xa + xp) * xn)** 2) - d * (g + tf.tan(alpha)**2) * K.transpose(xa) * xp losses.append(fapn) losses2.append(K.transpose(xa) * xn - K.transpose(xa) * xp) loss = (loss + g + _loss) except: continue loss = K.sum(K.log(1 + 2 * K.sum([K.exp(v) for v in losses]))) loss2 = K.sum(K.log(1 + 2 * K.sum([K.exp(v) for v in losses2]))) loss = loss + 2 * loss2 loss = loss / (batch_size / 3) zero = tf.constant(0.0, shape=[1], dtype=tf.float32) return tf.maximum(loss, zero)
def focal_loss(y_true, y_pred): gamma = 2.0 alpha = 0.25 pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.sum( (1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
def neg_log_likelihood(y_true, y_pred): """Returns negative log likelihood of Gaussian""" y_true = y_true[:, 0] mean = y_pred[:, 0] variance = K.softplus(y_pred[:, 1]) + 1e-6 log_variance = K.log(variance) return 0.5 * K.mean(log_variance, axis=-1) + 0.5 * K.mean( K.square(y_true - mean) / variance, axis=-1) + 0.5 * K.log(2 * np.pi)
def _rbox_aabb_loss(ground_truth_aabb, predicted_aabb, EPS=K.epsilon()): ground_truth_area = _aabb_box_area(ground_truth_aabb) predicted_area = _aabb_box_area(predicted_aabb) intersected_area = _aabb_intersected_area(ground_truth_aabb, predicted_aabb) union_area = ground_truth_area + predicted_area - intersected_area # Equivalent to -log(intersected_area / union_area) return K.log(union_area + EPS) - K.log(intersected_area + EPS)
def weighted_bce_loss(y_true, y_pred, weight): # avoiding overflow epsilon = 1e-7 y_pred = K.clip(y_pred, epsilon, 1. - epsilon) logit_y_pred = K.log(y_pred / (1. - y_pred)) # https://www.tensorflow.org/api_docs/python/tf/nn/weighted_cross_entropy_with_logits loss = (1. - y_true) * logit_y_pred + (1. + (weight - 1.) * y_true) * \ (K.log(1. + K.exp(-K.abs(logit_y_pred))) + K.maximum(-logit_y_pred, 0.)) return K.sum(loss) / K.sum(weight)
def entropy_estimator_kl(x, var): # KL-based upper bound on entropy of mixture of Gaussians with covariance matrix var * I # see Kolchinsky and Tracey, Estimating Mixture Entropy with Pairwise Distances, Entropy, 2017. Section 4. # and Kolchinsky and Tracey, Nonlinear Information Bottleneck, 2017. Eq. 10 dims, N = get_shape(x) dists = Kget_dists(x) dists2 = dists / (2 * var) normconst = (dims / 2.0) * K.log(2 * np.pi * var) lprobs = tf.reduce_logsumexp(-dists2, axis=1) - K.log(N) - normconst h = -K.mean(lprobs) return dims / 2 + h
def custom_loss(y_true, y_pred, loss_weights = loss_weights): # Verified zero_index = K.zeros_like(y_true[:, 0]) ones_index = K.ones_like(y_true[:, 0]) # Classifier labels = y_true[:, 0] class_preds = y_pred[:, 0] bi_crossentropy_loss = -labels * K.log(class_preds) - (1 - labels) * K.log(1 - class_preds) classify_valid_index = tf.where(K.less(y_true[:, 0], 0), zero_index, ones_index) classify_keep_num = K.cast(tf.cast(tf.reduce_sum(classify_valid_index), tf.float32) * SAMPLE_KEEP_RATIO, dtype = tf.int32) # For classification problem, only pick 70% of the valid samples. classify_loss_sum = bi_crossentropy_loss * tf.cast(classify_valid_index, bi_crossentropy_loss.dtype) classify_loss_sum_filtered, _ = tf.nn.top_k(classify_loss_sum, k = classify_keep_num) classify_loss = tf.where(K.equal(classify_keep_num, 0), tf.constant(0, dtype = tf.float32), K.mean(classify_loss_sum_filtered)) # Bounding box regressor rois = y_true[:, 1: 5] roi_preds = y_pred[:, 1: 5] roi_raw_mean_square_error = K.sum(K.square(rois - roi_preds), axis = 1) # mse # roi_raw_smooth_l1_loss = K.mean(tf.where(K.abs(rois - roi_preds) < 1, 0.5 * K.square(rois - roi_preds), K.abs(rois - roi_preds) - 0.5)) # L1 Smooth Loss roi_valid_index = tf.where(K.equal(K.abs(y_true[:, 0]), 1), ones_index, zero_index) roi_keep_num = K.cast(tf.reduce_sum(roi_valid_index), dtype = tf.int32) roi_valid_mean_square_error = roi_raw_mean_square_error * tf.cast(roi_valid_index, roi_raw_mean_square_error.dtype) roi_filtered_mean_square_error, _ = tf.nn.top_k(roi_valid_mean_square_error, k = roi_keep_num) roi_loss = tf.where(K.equal(roi_keep_num, 0), tf.constant(0, dtype = tf.float32), K.mean(roi_filtered_mean_square_error)) # roi_valid_smooth_l1_loss = roi_raw_smooth_l1_loss * roi_valid_index # roi_filtered_smooth_l1_loss, _ = tf.nn.top_k(roi_valid_smooth_l1_loss, k = roi_keep_num) # roi_loss = K.mean(roi_filtered_smooth_l1_loss) # Landmark regressor pts = y_true[:, 5: 17] pt_preds = y_pred[:, 5: 17] pts_raw_mean_square_error = K.sum(K.square(pts - pt_preds), axis = 1) # mse # pts_raw_smooth_l1_loss = K.mean(tf.where(K.abs(pts - pt_preds) < 1, 0.5 * K.square(pts - pt_preds), K.abs(pts - pt_preds) - 0.5)) # L1 Smooth Loss pts_valid_index = tf.where(K.equal(y_true[:, 0], -2), ones_index, zero_index) pts_keep_num = K.cast(tf.reduce_sum(pts_valid_index), dtype = tf.int32) pts_valid_mean_square_error = pts_raw_mean_square_error * tf.cast(pts_valid_index, tf.float32) pts_filtered_mean_square_error, _ = tf.nn.top_k(pts_valid_mean_square_error, k = pts_keep_num) pts_loss = tf.where(K.equal(pts_keep_num, 0), tf.constant(0, dtype = tf.float32), K.mean(pts_filtered_mean_square_error)) # pts_valid_smooth_l1_loss = pts_raw_smooth_l1_loss * pts_valid_index # pts_filtered_smooth_l1_loss, _ = tf.nn.top_k(pts_valid_smooth_l1_loss, k = pts_keep_num) # pts_loss = K.mean(pts_filtered_smooth_l1_loss) loss = classify_loss * loss_weights[0] + roi_loss * loss_weights[1] + pts_loss * loss_weights[2] return loss
def east_loss(y_true, y_pred): sc_true = y_true[0] sc_pred = y_pred[0] #bb_true = y_true[1] #bb_pred = y_pred[1] #print(y_true) B = 1 - K.mean(sc_true) cl_loss = (-1) * B * sc_true * K.log(sc_pred + 1e-6) - (1 - B) * ( 1 - sc_true) * K.log(1 - sc_pred + 1e-6) res = K.sum(cl_loss) return res
def build_predictor(self, predict_activation=None): """ Construct the predictor network from the list of layers After the last layer in self.predictorLayers_, a final Dense layer is added that with self.predDim_ units (i.e. outputs the prediction) Args: predict_activation: activation function for the final dense layer """ if len(self.predictorLayers_) == 0: raise ValueError("Must add at least one predictor hidden layer") pred_in = self._build_decoder_inputs() h = self._edit_decoder_inputs(pred_in) for hid in self.predictorLayers_: h = hid(h) y_pred = Dense(units=self.predDim_, activation=predict_activation)(h) log_var_y = Dense(self.predDim_, name='log_var_y')(h) if not self.learnUncertainty_: log_var_y = Lambda(lambda lv: 0 * lv + K.ones_like(lv) * K.log(K.variable(self.predVar_)))(log_var_y) self.predictor_ = Model(inputs=pred_in, outputs=[y_pred, log_var_y], name='predictor')
def _call_one_layer(self, inputs, flatten_memory, training, ws): dp_mask = self.get_dropout_mask_for_cell( inputs, training, count=1) rec_dp_mask = self.get_recurrent_dropout_mask_for_cell( flatten_memory, training, count=1) if 0 < self.dropout < 1: inputs = inputs * dp_mask[0] if 0 < self.recurrent_dropout < 1: flatten_memory = flatten_memory * rec_dp_mask[0] memory = array_ops.reshape( flatten_memory, shape=[-1, self.num_memory_slots, self.units]) input_gate, forget_gate = self._input_and_forget_gates(inputs, memory, ws) hs, new_memory = self._attend_over_memory(inputs, memory, ws) next_memory = input_gate * new_memory + forget_gate * memory flatten_next_memory = array_ops.reshape( next_memory, shape=[-1, self.num_memory_slots * self.units]) mus_and_log_sigmas = K.dot(hs, ws["random_kernel"]) mus_and_log_sigmas = K.bias_add(mus_and_log_sigmas, ws["random_bias"]) mus, log_sigmas = array_ops.split(mus_and_log_sigmas, 2, axis=-1) sigmas = K.log(1.0 + K.exp(log_sigmas + self.sigma_bias)) zs = K.random_normal(shape=K.shape(mus)) * sigmas + mus return zs, mus, sigmas, hs, flatten_next_memory
def loss(y_true, y_pred): prob = K.sum(y_true * y_pred, axis=-1) # Multiply with the one hot encoded taken action old_prob = K.sum(y_true * old_prediction, axis=-1) r = prob / (old_prob + 1e-10) return -K.mean(K.minimum(r * advantage, K.clip( r, min_value=1 - loss_clipping, max_value=1 + loss_clipping) * advantage) + entropy_loss * -( prob * K.log(prob + 1e-10)))
def binary_focal_loss_fixed(y_true, y_pred): """ :param y_true: A tensor of the same shape as `y_pred` :param y_pred: A tensor resulting from a sigmoid :return: Output tensor. """ pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) epsilon = K.epsilon() # clip to prevent NaN's and Inf's pt_1 = K.clip(pt_1, epsilon, 1. - epsilon) pt_0 = K.clip(pt_0, epsilon, 1. - epsilon) return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) \ -K.sum((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
def line_loss(y_true, y_pred): r1 = y_true * y_pred r2 = K.sigmoid(r1) r3 = K.log(r2) result = -K.mean(r3) return result
def focal_loss(y_true, y_pred): # Define espislon so that the backpropagation will not result int NaN # for 0 divisor case epsilon = K.epsilon() # Add the epsilon to prediction value # y_pred = y_pred + epsilon # Clip the prediction value y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon) alpha_factor = K.ones_like(y_true) * alpha # Calculate p_t p_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) # Calculate alpha_t alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) # Calculate cross entropy cross_entropy = -K.log(p_t) weight = alpha_t * K.pow((1 - p_t), gamma) # Calculate focal loss loss = weight * cross_entropy # Sum the losses in mini_batch loss = K.sum(loss, axis=1) return loss
def weighted_categorical_crossentropy(y_true, y_pred, n_classes=3, axis=None, from_logits=False): """Categorical crossentropy between an output tensor and a target tensor. Automatically computes the class weights from the target image and uses them to weight the cross entropy Args: y_true: A tensor of the same shape as y_pred. y_pred: A tensor resulting from a softmax (unless from_logits is True, in which case y_pred is expected to be the logits). from_logits: Boolean, whether y_pred is the result of a softmax, or is a tensor of logits. Returns: tensor: Output tensor. """ if from_logits: raise Exception('weighted_categorical_crossentropy cannot take logits') if axis is None: axis = 1 if K.image_data_format() == 'channels_first' else K.ndim(y_pred) - 1 reduce_axis = [x for x in list(range(K.ndim(y_pred))) if x != axis] # scale preds so that the class probas of each sample sum to 1 y_pred = y_pred / K.sum(y_pred, axis=axis, keepdims=True) # manual computation of crossentropy _epsilon = tf.convert_to_tensor(K.epsilon(), y_pred.dtype.base_dtype) y_pred = tf.clip_by_value(y_pred, _epsilon, 1. - _epsilon) y_true_cast = K.cast(y_true, K.floatx()) total_sum = K.sum(y_true_cast) class_sum = K.sum(y_true_cast, axis=reduce_axis, keepdims=True) class_weights = 1.0 / K.cast_to_floatx(n_classes) * tf.divide(total_sum, class_sum + 1.) return - K.sum((y_true * K.log(y_pred) * class_weights), axis=axis)
def call(self, inputs, **kwargs): inputs = inputs if isinstance(inputs, list) else [inputs] if len(inputs) < 1 or len(inputs) > 2: raise ValueError("AttentionLayer expect one or two inputs.") actual_input = inputs[0] mask = inputs[1] if len(inputs) > 1 else None if mask is not None and not ( ((len(mask.shape) == 3 and mask.shape[2] == 1) or len(mask.shape) == 2) and mask.shape[1] == self.input_length): raise ValueError( "`mask` should be of shape (batch, input_length) or (batch, input_length, 1) " "when calling an AttentionLayer.") assert actual_input.shape[-1] == self.attention_param.shape[0] # (batch, input_length, input_dim) * (input_dim, 1) ==> (batch, input_length, 1) attention_weights = K.dot(actual_input, self.attention_param) if mask is not None: if len(mask.shape) == 2: mask = K.expand_dims(mask, axis=2) # (batch, input_length, 1) mask = K.log(mask) attention_weights += mask attention_weights = K.softmax(attention_weights, axis=1) # (batch, input_length, 1) result = K.sum( actual_input * attention_weights, axis=1) # (batch, input_length) [multiplication uses broadcast] return result, attention_weights
def reinforce_loss(y_true, y_pred): # eps = 0.2 # entropy_loss = 0.001 * K.mean(K.sum(y_pred * K.log(y_pred + 1e-10), axis=1, keepdims=True)) # r = y_pred * y_true / (old_pred * y_true + 1e-10) # policy_loss = -K.mean(K.minimum(r * advantages, K.clip(r, 1 - eps, 1 + eps) * advantages)) # return policy_loss + entropy_loss # return K.mean(-K.log(y_pred) * y_true) return 0.001 * K.mean(K.sum(y_pred * K.log(y_pred + 1e-10), axis=1, keepdims=True))
def loss(y_true, y_pred): prob = K.sum(y_true * y_pred) old_prob = K.sum(y_true * old_prediction) r = prob / (old_prob + 1e-10) return -K.log(prob + 1e-10) * K.mean( K.minimum(r * advantage, K.clip(r, min_value=0.8, max_value=1.2) * advantage))
def LossCCEPieceWise(self, y_true, y_pred): """ Define piece-wise class cross entropy loss """ state1 = 0.5 * y_pred + 10e-3 state2 = y_pred y_pred = tf.where(y_pred < 2 * 10e-3, x=state1, y=state2) loss = K.sum(-K.log(y_pred) * y_true) return loss
def ppo_loss(y_true, y_pred): eps = 0.2 entropy_loss = 0.001 * K.mean( K.sum(y_pred * K.log(y_pred + 1e-10), axis=1, keepdims=True) ) # Danger : le masque des actions possibles n'est pas pris en compte !!! r = y_pred * y_true / (old_pred * y_true + 1e-10) policy_loss = -K.mean( K.minimum(r * advantages, K.clip(r, 1 - eps, 1 + eps) * advantages) ) return policy_loss + entropy_loss
def loss(y_true, y_pred): loss_val = -1 * K.sum( K.log(K.softmax(y_pred[:, :-1])) * y_true[:, :-1], axis=-1) return K.mean( K.switch( K.equal(task, 1005), loss_weights[task] * loss_val, K.switch(K.equal(y_true[:, -1], task), loss_val, loss_weights[task] * loss_val)))
def weighted_loss(y_true, y_pred): # return weighted_categorical_cross_entropy(y_true, y_pred, class_weights) y_pred /= K.sum(y_pred, axis=-1, keepdims=True) # clip to prevent NaN's and Inf's y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # calc loss = y_true * class_weights * K.log(y_pred) loss = -K.sum(loss, -1) return loss
def line_loss(y_true, y_pred): try: import tensorflow as tf except ImportWarning: print("tensorflow not found, please install") pass from tensorflow.python.keras import backend as K y = K.sigmoid(y_true * y_pred) # Avoid Nan in the result of 'K.log' return -K.mean(K.log(tf.clip_by_value(y, 1e-8, tf.reduce_max(y))))
def custom_loss(self, y_true, y_pred): """ GloVe's loss function, view section 3.1 on the original paper for details. :param y_true: The actual values, y_true = X_ij. :param y_pred: The predicted occurrences from the model ( w_i^T*w_j ). :return: The loss associated with this batch. """ x_max = self.x_max alpha = self.alpha fxij = k.pow(k.clip(y_true / x_max, 0.0, 1.0), alpha) return k.sum(fxij * k.square(y_pred - k.log(y_true)), axis=-1)
def line_loss(y_true, y_pred): ''' y_true = np.vstack([k_weight, odw]).T ''' r1 = layers.multiply([y_true[:, 1], y_pred]) r2 = K.sigmoid(r1) r3 = K.log(r2) r4 = layers.multiply([y_true[:, 0], r3]) result = -K.mean(r4) return result
def line_loss(y_true, y_pred): ''' y_true[0]: -1 or +1 (indicating pos/neg samples) y_true[1]: lamb (lamb * NS_loss) ''' r1 = y_true[0][0] * y_pred r2 = K.sigmoid(r1) r3 = K.log(r2) result = y_true[0][1] * -K.mean(r3) return result
def call(self, inputs, **kwargs): W = K.tanh(self.W_hat) * K.sigmoid(self.M_hat) a = K.dot(inputs, W) if self.nac_only: outputs = a else: m = K.exp(K.dot(K.log(K.abs(inputs) + self.epsilon), W)) g = K.sigmoid(K.dot(inputs, self.G)) outputs = g * a + (1. - g) * m return outputs
def __call__(self, y_true, y_pred): y_true_val = y_true[:, :, 0] mask = y_true[:, :, 1] # masked per-sample means of each loss num_items_masked = K.sum(mask, axis=-1) + 1e-6 masked_cross_entropy = ( K.sum(mask * K.sparse_categorical_crossentropy(y_true_val, y_pred), axis=-1) / num_items_masked) masked_entropy = ( K.sum(mask * -K.sum(y_pred * K.log(y_pred), axis=-1), axis=-1) / num_items_masked) return masked_cross_entropy - self.penalty_weight * masked_entropy