def binary_PFA(y_true, y_pred, threshold=K.variable(value=0.5)): y_pred = K.cast(y_pred >= threshold, 'float32') # N = total number of negative labels N = K.sum(1 - y_true) # FP = total number of false alerts, alerts from the negative class labels TN = K.sum((1 - y_pred) * (1 - y_true)) return TN / N
def dice_coeff(y_true, y_pred): smooth = 0.001 y_true = K.flatten(y_true) y_pred = K.flatten(y_pred) intersection = K.sum(y_true * y_pred) return (2. * intersection + smooth) / (K.sum(y_true) + K.sum(y_pred) + smooth)
def dice_coef(y_true, y_pred): smooth = 1e-7 y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + smooth * 0.01) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def IOU_calc(y_true, y_pred): y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return 2 * (intersection + iou_smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + iou_smooth)
def binary_PTA(y_true, y_pred, threshold=K.variable(value=0.5)): y_pred = K.cast(y_pred >= threshold, 'float32') # P = total number of positive labels P = K.sum(y_true) # TP = total number of correct alerts, alerts from the positive class labels TP = K.sum(y_pred * y_true) return TP / P
def get_initial_state(self, x): input_shape = self.input_spec[0].shape init_nb_row = input_shape[self.row_axis] init_nb_col = input_shape[self.column_axis] base_initial_state = K.zeros_like( x) # (samples, timesteps) + image_shape non_channel_axis = -1 if self.data_format == 'channels_first' else -2 for _ in range(2): base_initial_state = K.sum(base_initial_state, axis=non_channel_axis) base_initial_state = K.sum(base_initial_state, axis=1) # (samples, nb_channels) initial_states = [] states_to_pass = ['r', 'c', 'e'] nlayers_to_pass = {u: self.nb_layers for u in states_to_pass} if self.extrap_start_time is not None: states_to_pass.append( 'ahat' ) # pass prediction in states so can use as actual for t+1 when extrapolating nlayers_to_pass['ahat'] = 1 for u in states_to_pass: for l in range(nlayers_to_pass[u]): ds_factor = 2**l nb_row = init_nb_row // ds_factor nb_col = init_nb_col // ds_factor if u in ['r', 'c']: stack_size = self.R_stack_sizes[l] elif u == 'e': stack_size = 2 * self.stack_sizes[l] elif u == 'ahat': stack_size = self.stack_sizes[l] output_size = stack_size * nb_row * nb_col # flattened size reducer = K.zeros((input_shape[self.channel_axis], output_size)) # (nb_channels, output_size) initial_state = K.dot(base_initial_state, reducer) # (samples, output_size) if self.data_format == 'channels_first': output_shp = (-1, stack_size, nb_row, nb_col) else: output_shp = (-1, nb_row, nb_col, stack_size) initial_state = K.reshape(initial_state, output_shp) initial_states += [initial_state] if K._BACKEND == 'theano': from theano import tensor as T # There is a known issue in the Theano scan op when dealing with inputs whose shape is 1 along a dimension. # In our case, this is a problem when training on grayscale images, and the below line fixes it. initial_states = [ T.unbroadcast(init_state, 0, 1) for init_state in initial_states ] if self.extrap_start_time is not None: initial_states += [ K.variable(0, int if K.backend() != 'tensorflow' else 'int32') ] # the last state will correspond to the current timestep return initial_states
def other_class_accuracy(y_true, y_pred): class_id_true = K.argmax(y_true, axis=-1) class_id_preds = K.argmax(y_pred, axis=-1) # Replace class_id_preds with class_id_true for recall here class_type_mask = K.cast(K.greater(class_id_true, INTERESTING_CLASS_ID), 'int32') class_acc_tensor = K.cast(K.greater_equal(class_id_preds, class_id_true), 'int32') * class_type_mask class_acc = K.sum(class_acc_tensor) / K.maximum(K.sum(class_type_mask), 1) return class_acc
def fn(y_true, y_pred): class_id_true = K.argmax(y_true, axis=-1) class_id_preds = K.argmax(y_pred, axis=-1) # Replace class_id_preds with class_id_true for recall here accuracy_mask = K.cast(K.equal(class_id_preds, interesting_class_id), 'int32') class_acc_tensor = K.cast(K.equal(class_id_true, class_id_preds), 'int32') * accuracy_mask class_acc = K.sum(class_acc_tensor) / K.maximum(K.sum(accuracy_mask), 1) return class_acc
def get_f1(y_true, y_pred): #taken from old keras source code true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) recall = true_positives / (possible_positives + K.epsilon()) f1_val = 2*(precision*recall)/(precision+recall+K.epsilon()) return f1_val
def dice_coef(y_true, y_pred, smooth=1e-3): y_true_f = K.flatten(y_true) y_pred_f = K.flatten(y_pred) intersection = K.sum(y_true_f * y_pred_f) return K.mean( (2.0 * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth) )
def loss(y_true, y_pred): # scale predictions so that the class probas of each sample sum to 1 y_pred /= K.sum(y_pred, axis=-1, keepdims=True) # clip to prevent NaN's and Inf's y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) # calc loss = y_true * K.log(y_pred) * weights loss = -K.sum(loss, -1) return loss
def dice_coef(y_true, y_pred): #y_true_f = K.flatten(y_true.astype('float32')) #y_pred_f = K.flatten(y_pred.astype('float32')) y_true_f = K.flatten(y_true) # K.flatten(y_true.astype('float32')) y_pred_f = K.flatten(y_pred) # K.flatten(y_pred.astype('float32')) print (y_true_f) print ("******************") print (y_pred_f) intersection = K.sum(y_true_f * y_pred_f) return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)
def accuracy(self, y_true, y_pred): """ computes the accuracy # Arguments y_true : target value y_pred : predicted class value # Returns acc : overall accuracy """ correct, total, _ = self.__metrics_base(y_true, y_pred) return (K.sum(correct) / K.sum(total))
def weighted_BCE(y_true, y_pred): # scale predictions so that the class probas of each sample sum to 1 # weights = tfb.variable(1/np.array([0.07050923, 0.24034695, 0.19802742, 0.09862899, 0.16046447, 0.08317012, 0.10002798, 0.04882485])) weights = tfb.variable(np.array([1, 1, 1, 1, 1, 1, 1, 1])) y_pred /= tfb.sum(y_pred, axis=-1, keepdims=True) # clip to prevent NaN's and Inf's y_pred = tfb.clip(y_pred, tfb.epsilon(), 1 - tfb.epsilon()) # calc loss = y_true * tfb.log(y_pred) * weights loss = -tfb.sum(loss, -1) return loss
def precision(y_true, y_pred): """Precision metric. Only computes a batch-wise average of precision. Computes the precision, a metric for multi-label classification of how many selected items are relevant. """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) precision = true_positives / (predicted_positives + K.epsilon()) return precision
def recall(y_true, y_pred): """Recall metric. Only computes a batch-wise average of recall. Computes the recall, a metric for multi-label classification of how many relevant items are selected. """ true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) recall = true_positives / (possible_positives + K.epsilon()) return recall
def dice_axon(y_true, y_pred, smooth=1e-3): """ Computes the pixel-wise dice myelin coefficient from the prediction tensor outputted by the network. :param y_pred: Tensor, the prediction outputed by the network. Shape (N,H,W,C). :param y_true: Tensor, the gold standard we work with. Shape (N,H,W,C). :return: dice axon coefficient for the current batch. """ y_true_f = K.flatten(y_true[..., 2]) y_pred_f = K.flatten(y_pred[..., 2]) intersection = K.sum(y_true_f * y_pred_f) return K.mean((2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth))
def f1_loss(y_true, y_pred): tp = K.sum(K.cast(y_true*y_pred, 'float'), axis=0) tn = K.sum(K.cast((1-y_true)*(1-y_pred), 'float'), axis=0) fp = K.sum(K.cast((1-y_true)*y_pred, 'float'), axis=0) fn = K.sum(K.cast(y_true*(1-y_pred), 'float'), axis=0) p = tp / (tp + fp + K.epsilon()) r = tp / (tp + fn + K.epsilon()) f1 = 2*p*r / (p+r+K.epsilon()) f1 = tf.where(tf.is_nan(f1), tf.zeros_like(f1), f1) return 1 - K.mean(f1)
def loss(self,y_true,y_pred): """ executes the categorical cross-entropy # Arguments y_true : true class values y_pred : predicted class values from the model # Returns ce : mean cross-entropy for the given batch """ y_pred = super().clipping(y_pred) ce = -(K.sum((super().c_weights(self.class_weights) * (y_true * K.log(y_pred))),axis=-1)) ce = K.sum((super().p_weights(self.pixel_weights) * ce),axis=(1,2)) ce = K.mean(ce,axis=0) return ce/1000 ## scaling down the loss to prevent gradient explosion
def loss(self,y_true,y_pred): """ executes the focal loss # Arguments y_true : true class values y_pred : predicted class values from the model # Returns fl : mean focal loss for the given batch """ y_pred = self.clipping(y_pred) fl = -(K.sum((self.c_weights(self.class_weights) * K.pow(1.-y_pred,self.gamma) * (y_true * K.log(y_pred))),axis=-1)) fl = K.sum((self.p_weights(self.pixel_weights) * fl),axis=(1,2)) fl = K.mean(fl, axis=0) return fl/1000 ## scaling down the loss to prevent gradient explosion
def social_attention(self, x): self.user_embedding = Embedding(input_dim=self.max_user + 1, output_dim=self.H, trainable=True, mask_zero=True) all_u_i = self.user_embedding(x) w_v = Dense(1, use_bias=False) w_x = Dense(self.H, use_bias=False) u_i = Lambda(lambda xin: xin[:, 0, :])(all_u_i) e = [] w_all_u_i = [] u_i = w_x(u_i) for j in range(self.walk_length): u_j = Lambda(lambda xin: xin[:, j, :])(all_u_i) u_j = w_x(u_j) e_i_j = LeakyReLU(alpha=0.3)(w_v(concatenate([u_i, u_j]))) e.append(e_i_j) w_all_u_i.append(u_j) e = concatenate(e, axis=-1) w_all_u_i = Reshape((self.walk_length, self.H))(concatenate(w_all_u_i, axis=-1)) alpha = Lambda(lambda xin: K.repeat_elements( K.expand_dims(K.softmax(xin), -1), rep=self.H, axis=-1))(e) u_f_i = Lambda(lambda xin: K.sum(xin, axis=1))(multiply( [alpha, w_all_u_i])) return u_f_i
def test_gradient(self): val = np.random.random((4, 2)) xth = KTH.variable(val) xtf = KTF.variable(val) expth = xth * KTH.exp(xth) exptf = xtf * KTF.exp(xtf) lossth = KTH.sum(expth) losstf = KTF.sum(exptf) zero_lossth = KTH.stop_gradient(lossth) zero_losstf = KTF.stop_gradient(losstf) gradth = KTH.gradients(lossth, [expth]) gradtf = KTF.gradients(losstf, [exptf]) zero_gradth = KTH.gradients(lossth + zero_lossth, [expth]) zero_gradtf = KTF.gradients(losstf + zero_losstf, [exptf]) zth = KTH.eval(gradth[0]) ztf = KTF.eval(gradtf[0]) zero_zth = KTH.eval(zero_gradth[0]) zero_ztf = KTF.eval(zero_gradtf[0]) assert zth.shape == ztf.shape assert zero_zth.shape == zero_ztf.shape assert_allclose(zth, ztf, atol=1e-05) assert_allclose(zero_zth, zero_ztf, atol=1e-05) assert_allclose(zero_zth, zth, atol=1e-05) assert_allclose(zero_ztf, ztf, atol=1e-05)
def call(self, x, mask=None): uit = dot_product(x, self.W) if self.bias: uit += self.b uit = K.tanh(uit) #ait = K.dot(uit, self.u) ait = dot_product(uit, self.u) a = K.exp(ait) # apply mask after the exp. will be re-normalized next if mask is not None: # Cast the mask to floatX to avoid float64 upcasting in theano a *= K.cast(mask, K.floatx()) # in some cases especially in the early stages of training the sum may be almost zero # and this results in NaN's. A workaround is to add a very small positive number \epsilon to the sum. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx()) a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx()) a = K.expand_dims(a) weighted_input = x * a #return K.sum(weighted_input, axis=1) print "here", weighted_input.shape return weighted_input
def focal_loss_fixed(y_true, y_pred): eps = 1e-6 alpha = 0.5 y_pred=K.clip(y_pred,eps,1.-eps)#improve the stability of the focal loss and see issues 1 for more information pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) return -K.mean(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0),axis=-1)
def test_gradient(self): val = np.random.random((4, 2)) xth = KTH.variable(val) xtf = KTF.variable(val) expth = xth * KTH.exp(xth) exptf = xtf * KTF.exp(xtf) lossth = KTH.sum(expth) losstf = KTF.sum(exptf) zero_lossth = KTH.stop_gradient(lossth) zero_losstf = KTF.stop_gradient(losstf) gradth = KTH.gradients(lossth, [expth]) gradtf = KTF.gradients(losstf, [exptf]) zero_gradth = KTH.gradients(lossth + zero_lossth, [expth]) zero_gradtf = KTF.gradients(losstf + zero_losstf, [exptf]) zth = KTH.eval(gradth[0]) ztf = KTF.eval(gradtf[0]) zero_zth = KTH.eval(zero_gradth[0]) zero_ztf = KTF.eval(zero_gradtf[0]) assert zth.shape == ztf.shape assert zero_zth.shape == zero_ztf.shape assert_allclose(zth, ztf, atol=1e-05) assert_allclose(zero_zth, zero_ztf, atol=1e-05) assert_allclose(zero_zth, zth, atol=1e-05) assert_allclose(zero_ztf, ztf, atol=1e-05)
def gradient_penalty_loss(y_true, y_pred, averaged_samples, gradient_penalty_weight): """Calculates the gradient penalty loss for a batch of "averaged" samples. In Improved WGANs, the 1-Lipschitz constraint is enforced by adding a term to the loss function that penalizes the network if the gradient norm moves away from 1. However, it is impossible to evaluate this function at all points in the input space. The compromise used in the paper is to choose random points on the lines between real and generated samples, and check the gradients at these points. Note that it is the gradient w.r.t. the input averaged samples, not the weights of the discriminator, that we're penalizing! In order to evaluate the gradients, we must first run samples through the generator and evaluate the loss. Then we get the gradients of the discriminator w.r.t. the input averaged samples. The l2 norm and penalty can then be calculated for this gradient. Note that this loss function requires the original averaged samples as input, but Keras only supports passing y_true and y_pred to loss functions. To get around this, we make a partial() of the function with the averaged_samples argument, and use that for model training.""" # first get the gradients: # assuming: - that y_pred has dimensions (batch_size, 1) # - averaged_samples has dimensions (batch_size, nbr_features) # gradients afterwards has dimension (batch_size, nbr_features), basically # a list of nbr_features-dimensional gradient vectors gradients = K.gradients(y_pred, averaged_samples)[0] # compute the euclidean norm by squaring ... gradients_sqr = K.square(gradients) # ... summing over the rows ... gradients_sqr_sum = K.sum(gradients_sqr, axis=np.arange(1, len(gradients_sqr.shape))) # ... and sqrt gradient_l2_norm = K.sqrt(gradients_sqr_sum) # compute lambda * (1 - ||grad||)^2 still for each single sample gradient_penalty = gradient_penalty_weight * K.square(1 - gradient_l2_norm) # return the mean as loss over all the batch samples return K.mean(gradient_penalty)
def dice_coeffiecient(self, y_true, y_pred, average='inter', weights=None): """ Computes the dice score over each given class and returns the overall score. # Arguments y_true : target value y_pred : predicted class value average : 'inter' --> computes the dice score overall 'intra' --> computes the score for each calss and computes the average 'weighted' --> computes the weighted average , useful for imabalanced class. weights : only if average is specified 'weighted', weights for the respective classes. # Returns dice score """ y_pred = focal_loss.clipping(y_pred) intersection = 2 * K.sum((y_true * y_pred), axis=(0, 1, 2)) union = K.sum((y_true * y_true) + (y_pred * y_pred), axis=(0, 1, 2)) return self.__avg_method(intersection, union, average, weights)
def __metrics_base(self, y_true, y_pred): """ Base for all the metrics defined below """ y_true, y_pred = K.flatten(tf.math.argmax(y_true, axis=-1)), K.flatten( tf.math.argmax(y_pred, axis=-1)) con_mat = K.cast(tf.math.confusion_matrix(y_true, y_pred), K.floatx()) correct = tf.linalg.diag_part(con_mat) total = K.sum(con_mat, axis=-1) return correct, total, con_mat
def personalized_attention(self, u_s, h_i): dim = h_i.shape[-1] u_i = Dense(self.H, activation="tanh", use_bias=True)(h_i) dot_i_s = dot([u_i, u_s], axes=(2, 1)) alpha = Lambda(lambda xin: K.repeat_elements( K.expand_dims(K.softmax(xin), -1), rep=dim, axis=-1))(dot_i_s) s = Lambda(lambda xin: K.sum(xin, axis=1))(multiply([alpha, h_i])) return s
def loss(self,y_true,y_pred): """ executes the dice loss # Arguments y_true : true class values y_pred : predicted class values from the model # Returns dl : dice loss for the given batch """ y_pred = super().clipping(y_pred) intersection = K.sum((super().c_weights(self.class_weights) * y_true * y_pred),axis=-1) intersection = K.sum((super().p_weights(self.pixel_weights) * intersection),axis=(1,2)) union = K.sum( (super().c_weights(self.class_weights)*((y_true*y_true) + (y_pred*y_pred)) ),axis=-1) union = K.sum((super().p_weights(self.pixel_weights) * union),axis=(1,2)) dl = 1. - ((2*intersection)/union) return K.mean(dl)
def euclidean_distance_angles_biwi(y_true, y_pred): diff = y_pred - y_true weights = theano.shared( np.expand_dims(3 * np.array([0.2, 0.35, 0.45]), axis=0)) weights = T.patternbroadcast(weights, (True, False)) diff = diff * weights return K.sqrt(K.sum(K.square(diff), axis=-1, keepdims=True))