def log_normal(x, p=None, eps=0.0): if p is None: mu, var = 0, 1 else: mu, var = p var += eps return - 0.5 * K.sum(K.log(2*np.pi) + K.log(var) + K.square(x - mu) / var, axis=-1)
def call(self, inputs, **kwargs): assert isinstance(inputs, list) and len(inputs) == 3 first, second, features = inputs[0], inputs[1], inputs[2] if not self.from_logits: first = kb.clip(first, 1e-10, 1.0) second = kb.clip(second, 1e-10, 1.0) first_, second_ = kb.log(first), kb.log(second) else: first_, second_ = first, second # embedded_features.shape = (M, T, 1) if self.use_intermediate_layer: features = kb.dot(features, self.first_kernel) features = kb.bias_add(features, self.first_bias, data_format="channels_last") features = self.intermediate_activation(features) embedded_features = kb.dot(features, self.features_kernel) embedded_features = kb.bias_add( embedded_features, self.features_bias, data_format="channels_last") if self.use_dimension_bias: tiling_shape = [1] * (kb.ndim(first)-1) + [kb.shape(first)[-1]] embedded_features = kb.tile(embedded_features, tiling_shape) embedded_features = kb.bias_add( embedded_features, self.dimensions_bias, data_format="channels_last") sigma = kb.sigmoid(embedded_features) result = weighted_sum(first_, second_, sigma, self.first_threshold, self.second_threshold) probs = kb.softmax(result) if self.return_logits: return [probs, result] return probs
def kl_normal(q, p=None): qmu, qvar = q if p is None: pmu, pvar = 0, 1 else: pmu, pvar = p return 0.5 * K.sum(K.log(pvar) - K.log(qvar) + qvar/pvar + K.square(qmu - pmu) / pvar - 1, axis=-1)
def criterion_GAN(output, target, use_lsgan=True): if use_lsgan: diff = output - target dims = list(range(1, K.ndim(diff))) return K.expand_dims((K.mean(diff ** 2, dims)), 0) else: return K.mean(K.log(output + 1e-12) * target + K.log(1 - output + 1e-12) * (1 - target))
def mutual_info_loss(self, c, c_given_x): """The mutual information metric we aim to minimize""" eps = 1e-8 conditional_entropy = K.mean(- K.sum(K.log(c_given_x + eps) * c, axis=1)) entropy = K.mean(- K.sum(K.log(c + eps) * c, axis=1)) return conditional_entropy + entropy
def eigen_loss(y_true, y_pred): y_true = tf.Print(y_true, [y_true], message='y_true', summarize=30) y_pred = tf.Print(y_pred, [y_pred], message='y_pred', summarize=30) y_true_clipped = K.clip(y_true, K.epsilon(), None) y_pred_clipped = K.clip(y_pred, K.epsilon(), None) first_log = K.log(y_pred_clipped + 1.) second_log = K.log(y_true_clipped + 1.) w_x = K.variable(np.array([[-1., 0., 1.], [-1., 0., 1.], [-1., 0., 1.]]).reshape(3, 3, 1, 1)) grad_x_pred = K.conv2d(first_log, w_x, padding='same') grad_x_true = K.conv2d(second_log, w_x, padding='same') w_y = K.variable(np.array([[-1., -1., -1.], [0., 0., 0.], [1., 1., 1.]]).reshape(3, 3, 1, 1)) grad_y_pred = K.conv2d(first_log, w_y, padding='same') grad_y_true = K.conv2d(second_log, w_y, padding='same') diff_x = grad_x_pred - grad_x_true diff_y = grad_y_pred - grad_y_true log_term = K.mean(K.square((first_log - second_log)), axis=-1) sc_inv_term = K.square(K.mean((first_log - second_log),axis=-1)) grad_loss = K.mean(K.square(diff_x) + K.square(diff_y), axis=-1) return log_term - (0.5 * sc_inv_term) + grad_loss
def root_mean_squared_logarithmic_loss(y_true, y_pred): y_true = tf.Print(y_true, [y_true], message='y_true', summarize=30) y_pred = tf.Print(y_pred, [y_pred], message='y_pred', summarize=30) first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) return K.sqrt(K.mean(K.square(first_log - second_log), axis=-1)+0.00001)
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(T, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' yolo_outputs = args[:3] y_true = args[3:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(3)] loss = 0 m = K.shape(yolo_outputs[0])[0] for l in range(3): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] pred_xy, pred_wh, pred_confidence, pred_class_probs = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet box loss. xy_delta = (y_true[l][..., :2]-pred_xy)*grid_shapes[l][::-1] wh_delta = K.log(y_true[l][..., 2:4]) - K.log(pred_wh) # Avoid log(0)=-inf. wh_delta = K.switch(object_mask, wh_delta, K.zeros_like(wh_delta)) box_delta = K.concatenate([xy_delta, wh_delta], axis=-1) box_delta_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) box_loss = object_mask * K.square(box_delta*box_delta_scale) confidence_loss = object_mask * K.square(1-pred_confidence) + \ (1-object_mask) * K.square(0-pred_confidence) * ignore_mask class_loss = object_mask * K.square(true_class_probs-pred_class_probs) loss += K.sum(box_loss) + K.sum(confidence_loss) + K.sum(class_loss) return loss / K.cast(m, K.dtype(loss))
def loglik_continuous_conditional_correction(y, u, a, b, epsilon=1e-35): """Integrated conditional excess loss. Explanation TODO """ ya = (y + epsilon) / a loglikelihoods = y * \ (u * (K.log(b) + b * K.log(ya)) - (b / (b + 1.)) * K.pow(ya, b)) return loglikelihoods
def focal_loss_fixed(y_true, y_pred): if(K.backend()=="tensorflow"): import tensorflow as tf pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred) return -K.mean(alpha * K.pow(1. - pt, gamma) * K.log(pt)) if(K.backend()=="theano"): import theano.tensor as T pt = T.where(T.eq(y_true, 1), y_pred, 1 - y_pred) return -K.mean(alpha * K.pow(1. - pt, gamma) * K.log(pt))
def calc_loss(pred, target, loss='l2'): """ Calculate Loss from Shoanlu GAN """ if loss.lower() == "l2": return K.mean(K.square(pred - target)) if loss.lower() == "l1": return K.mean(K.abs(pred - target)) if loss.lower() == "cross_entropy": return -K.mean(K.log(pred + K.epsilon()) * target + K.log(1 - pred + K.epsilon()) * (1 - target)) raise ValueError('Recieve an unknown loss type: {}.'.format(loss))
def weighted_bce_loss(y_true, y_pred, weight): # avoiding overflow epsilon = 1e-7 y_pred = K.clip(y_pred, epsilon, 1. - epsilon) logit_y_pred = K.log(y_pred / (1. - y_pred)) # https://www.tensorflow.org/api_docs/python/tf/nn/weighted_cross_entropy_with_logits loss = (1. - y_true) * logit_y_pred + (1. + (weight - 1.) * y_true) * \ (K.log(1. + K.exp(-K.abs(logit_y_pred))) + K.maximum(-logit_y_pred, 0.)) return K.sum(loss) / K.sum(weight)
def get_output(self, train=False): print "LogAnyBoundOcc", self.output_shape X = self.get_input(train) log_none_bnd = K.sum( K.log(1-K.clip(K.exp(X), 1e-6, 1-1e-6)), axis=3, keepdims=True) at_least_1_bnd = 1-K.exp(log_none_bnd) max_occ = K.max(K.exp(X), axis=3, keepdims=True) # we take the weighted sum because the max is easier to fit, and # thus this helps to regularize the optimization procedure rv = K.log(0.05*max_occ + 0.95*at_least_1_bnd) return rv
def __call__(self, x): xshape = K.int_shape(x) if self.axis is 'last': x = K.reshape(x, (-1, xshape[-1])) x /= K.sqrt(K.sum(K.square(x), axis=0, keepdims=True)) xx = K.dot(K.transpose(x), x) return self.gamma * K.sum(K.log(1.0 + K.exp(self.lam * (xx - 1.0))) * (1.0 - K.eye(xshape[-1]))) elif self.axis is 'first': x = K.reshape(x, (xshape[0], -1)) x /= K.sqrt(K.sum(K.square(x), axis=1, keepdims=True)) xx = K.dot(x, K.transpose(x)) return self.gamma * K.sum(K.log(1.0 + K.exp(self.lam * (xx - 1.0))) * (1.0 - K.eye(xshape[0])))
def neg_log_normal_mixture(self, y_true, parameters): components = K.reshape(parameters,[-1, self.d + 2, self.m]) mu = components[:, :self.d, :] sigma = components[:, self.d, :] alpha = components[:, self.d + 1, :] alpha = K.softmax(K.clip(alpha,1e-8,1.)) exponent = K.log(alpha) - .5 * float(self.d) * K.log(2 * np.pi) \ - float(self.d) * K.log(sigma) \ - K.sum((K.expand_dims(y_true,2) - mu)**2, axis=1)/(2*(sigma)**2) log_gauss = log_sum_exp(exponent, axis=1) res = - K.mean(log_gauss) return res
def binary_crossentropy_with_ranking(y_true, y_pred): """ Trying to combine ranking loss with numeric precision""" # first get the log loss like normal logloss = K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1) # next, build a rank loss # clip the probabilities to keep stability y_pred_clipped = K.clip(y_pred, K.epsilon(), 1-K.epsilon()) # translate into the raw scores before the logit y_pred_score = K.log(y_pred_clipped / (1 - y_pred_clipped)) # determine what the maximum score for a zero outcome is y_pred_score_zerooutcome_max = K.max(y_pred_score * (y_true <1)) # determine how much each score is above or below it rankloss = y_pred_score - y_pred_score_zerooutcome_max # only keep losses for positive outcomes rankloss = rankloss * y_true # only keep losses where the score is below the max rankloss = K.square(K.clip(rankloss, -100, 0)) # average the loss for just the positive outcomes rankloss = K.sum(rankloss, axis=-1) / (K.sum(y_true > 0) + 1) # return (rankloss + 1) * logloss - an alternative to try return rankloss + logloss
def recall_loss(y_true, y_pred): ''' input: y_true (theano Tensor), y_pred (theano Tensor) output: recall_loss (float) ''' # print(K.ndim(y_true), K.ndim(y_pred)) return -K.log(K.mean(K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1))))
def loglik_discrete(y, u, a, b, epsilon=1e-35): hazard0 = K.pow((y + epsilon) / a, b) hazard1 = K.pow((y + 1.0) / a, b) loglikelihoods = u * \ K.log(K.exp(hazard1 - hazard0) - 1.0) - hazard1 return loglikelihoods
def func(y_true, y_pred): Gxx = _get_kernel(y_true, y_true) Gzz = _get_kernel(y_pred, y_pred) Gxz = _get_kernel(y_true, y_pred) cost = K.log(K.sqrt(K.mean(Gxx)*K.mean(Gzz)) / K.mean(Gxz)) return cost
def my_loss(self, y_true, y_pred): # dimension of the features d = y_pred.shape[1] # Put all three in y_true # 1. selected probability sel_prob = y_true[:,:d] # 2. discriminator output dis_prob = y_true[:,d:(d+2)] # 3. valfunction output val_prob = y_true[:,(d+2):(d+4)] # 4. ground truth y_final = y_true[:,(d+4):] # A1. Compute the rewards of the actor network Reward1 = tf.reduce_sum(y_final * tf.log(dis_prob + 1e-8), axis = 1) # A2. Compute the rewards of the actor network Reward2 = tf.reduce_sum(y_final * tf.log(val_prob + 1e-8), axis = 1) # Difference is the rewards Reward = Reward1 - Reward2 # B. Policy gradient loss computation. loss1 = Reward * tf.reduce_sum( sel_prob * K.log(y_pred + 1e-8) + (1-sel_prob) * K.log(1-y_pred + 1e-8), axis = 1) - self.lamda * tf.reduce_mean(y_pred, axis = 1) # C. Maximize the loss1 loss = tf.reduce_mean(-loss1) return loss
def sigmoid_cross_entropy(y_true, y_pred): z = K.flatten(y_true) x = K.flatten(y_pred) q = 10 l = (1 + (q - 1) * z) loss = (K.sum((1 - z) * x) + K.sum(l * (K.log(1 + K.exp(- K.abs(x))) + K.max(-x, 0)))) / 500 return loss
def log_normal(x, p=0): if p is 0: mu, lv = 0, 0 else: dim = p.shape[1]//2 mu, lv = p[:, :dim], p[:, dim:] return - 0.5 * K.sum(K.log(2*np.pi) + lv + K.square(x - mu) * K.exp(-lv), axis=-1)
def get_output(self, train=False): print "LogNormalizedOccupancy", self.output_shape X = self.get_input(train) # calculate the log occupancies log_occs = theano_calc_log_occs(-X, self.chem_affinity) # reshape the output so that the forward and reverse complement # occupancies are viewed as different tracks log_occs = K.reshape(log_occs, (X.shape[0], 1, 2*X.shape[1], X.shape[3])) if self.steric_hindrance_win_len == 0: log_norm_factor = 0 else: # correct occupancies for overlapping binding sites occs = K.exp(log_occs) kernel = K.ones((1, 1, 1, 2*self.steric_hindrance_win_len-1), dtype='float32') win_occ_sum = K.conv2d(occs, kernel, border_mode='same').sum(axis=2, keepdims=True) win_prb_all_unbnd = TT.exp( K.conv2d(K.log(1-occs), kernel, border_mode='same')).sum(axis=2, keepdims=True) log_norm_factor = TT.log(win_occ_sum + win_prb_all_unbnd) #start = max(0, self.steric_hindrance_win_len-1) #stop = min(self.output_shape[3], # self.output_shape[3]-(self.steric_hindrance_win_len-1)) #rv = log_occs[:,:,:,start:stop] - log_norm_factor rv = (log_occs - log_norm_factor) return K.reshape( rv, (X.shape[0], 2*X.shape[1], 1, X.shape[3]) )
def logtanh(x, a=1): """ log * tanh See Also: arcsinh """ return K.tanh(x) * K.log(2 + a * abs(x))
def loss(self, y_true, y_pred): """ categorical crossentropy loss """ if self.crop_indices is not None: y_true = utils.batch_gather(y_true, self.crop_indices) y_pred = utils.batch_gather(y_pred, self.crop_indices) if self.use_float16: y_true = K.cast(y_true, 'float16') y_pred = K.cast(y_pred, 'float16') # scale and clip probabilities # this should not be necessary for softmax output. y_pred /= K.sum(y_pred, axis=-1, keepdims=True) y_pred = K.clip(y_pred, K.epsilon(), 1) # compute log probability log_post = K.log(y_pred) # likelihood # loss loss = - y_true * log_post # weighted loss if self.weights is not None: loss *= self.weights if self.vox_weights is not None: loss *= self.vox_weights # take the total loss # loss = K.batch_flatten(loss) mloss = K.mean(K.sum(K.cast(loss, 'float32'), -1)) tf.verify_tensor_all_finite(mloss, 'Loss not finite') return mloss
def vae_loss(y_true, y_pred): generation_loss = img_rows * img_cols \ * metrics.binary_crossentropy(x, x_decoded) kl_loss = 0.5 * tf.reduce_sum(K.square(z_mean) + K.square(z_var) - K.log(K.square(z_var + 1e-8)) - 1, axis=1) return tf.reduce_mean(generation_loss + kl_loss)
def setup_graphs(self): # Create shared network s, policy_network, value_network = self.model_factory(self, self.env[0], **self.kwargs) policy_network_params = policy_network.trainable_weights value_network_params = value_network.trainable_weights pi_values = policy_network(s) V_values = value_network(s) # Create shared target network st, target_policy_network, target_value_network = self.model_factory(self, self.env[0]) target_policy_network_params = target_policy_network.trainable_weights target_value_network_params = target_value_network.trainable_weights target_pi_values = target_policy_network(st) target_V_values = target_value_network(st) # Op for periodically updating target network with online network weights reset_local_policy_network_params = [policy_network_params[i].assign(target_policy_network_params[i]) for i in range(len(policy_network_params))] reset_local_value_network_params = [value_network_params[i].assign(target_value_network_params[i]) for i in range(len(value_network_params))] reset_target_policy_network_params = [target_policy_network_params[i].assign(policy_network_params[i]) for i in range(len(target_policy_network_params))] reset_target_value_network_params = [target_value_network_params[i].assign(value_network_params[i]) for i in range(len(target_value_network_params))] # Define A3C cost and gradient update equations a = tf.placeholder("float", [None, self.env[0].action_space.n]) R = tf.placeholder("float", [None, 1]) action_pi_values = tf.reduce_sum(tf.mul(pi_values, a), reduction_indices=1) # policy network update cost_pi = -K.log( tf.reduce_sum( action_pi_values ) ) * (R-V_values) #optimizer_pi = keras.optimizers.Adam(self.learning_rate, clipvalue=1e3) optimizer_pi = tf.train.RMSPropOptimizer(self.learning_rate) grad_update_pi = optimizer_pi.minimize(cost_pi, var_list=policy_network_params) grad_pi = K.gradients(cost_pi, policy_network_params) # value network update cost_V = tf.reduce_mean( tf.square( R - V_values ) ) #optimizer_V = keras.optimizers.Adam(self.learning_rate, clipvalue=1e3) optimizer_V = tf.train.RMSPropOptimizer(self.learning_rate) grad_update_V = optimizer_V.minimize(cost_V, var_list=value_network_params) grad_V = K.gradients(cost_V, value_network_params) # store variables and update functions for access self.graph_ops = { "R" : R, "s" : s, "pi_values" : pi_values, "V_values" : V_values, "st" : st, "reset_target_policy_network_params" : reset_target_policy_network_params, "reset_target_value_network_params" : reset_target_value_network_params, "reset_local_policy_network_params" : reset_local_policy_network_params, "reset_local_value_network_params" : reset_local_value_network_params, "a" : a, "grad_update_pi" : grad_update_pi, "cost_pi" : cost_pi, "grad_pi" : grad_pi, "grad_update_V" : grad_update_V, "cost_V" : cost_V, "grad_V" : grad_V }
def compute_sigma_reg(self, y_true, y_pred): if self.logvar_map is not None: logvar_map = self.logvar_map elif self.var_map is not None: logvar_map = K.log(self.var_map + 1e-8) # we will scale later to K.sum return 0.5 * K.clip(logvar_map, -100, 100)
def label_reg_loss(y_true, y_pred): # KL-div y_true = K.clip(y_true, K.epsilon(), 1) y_pred = K.clip(y_pred, K.epsilon(), 1) y_true_mean = K.mean(y_true, axis=0) y_pred_mean = K.mean(y_pred, axis=0) return K.sum(y_true_mean * K.log(y_true_mean / y_pred_mean), axis=-1)
def __call__(self, x): xshape = K.int_shape(x) if self.division_idx is None: self.division_idx = xshape[-1]/2 x = K.reshape(x, (-1, xshape[-1])) x /= K.sqrt(K.sum(K.square(x), axis=0, keepdims=True)) # xx = K.dot(K.transpose(x), x) xx = K.sum(x[:,:self.division_idx] * x[:,self.division_idx:], axis=0) return self.gamma * K.sum(K.log(1.0 + K.exp(self.lam * (xx - 1.0))))
def soft_max_loss(y_true,y_pred): y_cal=y_true*y_pred y_cal_max=K.sum(y_cal,axis=2) return -K.sum(K.log(K.clip(y_cal_max,K.epsilon(),1-K.epsilon())))/y_true.size#
def boundary_loss(self, y_true, y_pred): """ Boundary seeking loss. Reference: https://wiseodd.github.io/techblog/2017/03/07/boundary-seeking-gan/ """ return 0.5 * K.mean((K.log(y_pred) - K.log(1 - y_pred))**2)
def call(self, inputs): return (1 / self.alpha) * K.log((1 + K.exp(self.alpha * inputs))/(1 + K.exp(self.alpha *(inputs - 1))))
def fn(y, yhat): rho_hat = fnr_approx(y, yhat) return K.relu(-K.log(1 - rho_hat + rho))
def bce_logdice_loss(y_true, y_pred): return binary_crossentropy(y_true, y_pred) - K.log(1. - dice_loss(y_true, y_pred))
def crossentropy_pos(y, yhat): # used by ping-pong yhat = K.clip(yhat, K.epsilon(), 1 - K.epsilon()) return -K.sum(y * K.log(yhat)) / (K.sum(y) + K.epsilon())
def mean_squared_logarithmic_error(y_true, y_pred): first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) return K.mean(K.square(first_log - second_log), axis=-1)
def logloss(y_true, y_pred): #policy loss return -K.sum( K.log(y_true * y_pred + (1 - y_true) * (1 - y_pred) + const), axis=-1) + BETA * K.sum( y_pred * K.log(y_pred + const) + (1 - y_pred) * K.log(1 - y_pred + const)) #regularisation term
def iou_loss(y_true, y_pred): # loss for the iou value iou_loss = -K.log(iou_metric_mean(y_true, y_pred)) return iou_loss
def custom_loss(y_true, y_pred): loss1 = binary_crossentropy(y_true, y_pred) loss2 = mean_iou(y_true, y_pred) a1 = 1 a2 = 0 return a1 * loss1 + a2 * K.log(loss2)
def loss(self, y_true, y_pred): #handle for config mc = self.config #slice y_true input_mask = y_true[:, :, 0] input_mask = K.expand_dims(input_mask, axis=-1) box_input = y_true[:, :, 1:5] box_delta_input = y_true[:, :, 5:9] labels = y_true[:, :, 9:] #number of objects. Used to normalize bbox and classification loss num_objects = K.sum(input_mask) #before computing the losses we need to slice the network outputs pred_class_probs, pred_conf, pred_box_delta = ut.slice_predictions( y_pred, mc) tf.print(pred_class_probs) tf.print(pred_conf) #compute boxes det_boxes = ut.boxes_from_deltas(pred_box_delta, mc) #again unstack is not avaible in pure keras backend unstacked_boxes_pred = [] unstacked_boxes_input = [] for i in range(4): unstacked_boxes_pred.append(det_boxes[:, :, i]) unstacked_boxes_input.append(box_input[:, :, i]) #compute the ious ious = ut.tensor_iou(ut.bbox_transform(unstacked_boxes_pred), ut.bbox_transform(unstacked_boxes_input), input_mask, mc) #compute class loss,add a small value into log to prevent blowing up class_loss = K.sum(labels * (-K.log(pred_class_probs + mc.EPSILON)) + (1 - labels) * (-K.log(1 - pred_class_probs + mc.EPSILON)) * input_mask * mc.LOSS_COEF_CLASS) / num_objects #bounding box loss bbox_loss = (K.sum( mc.LOSS_COEF_BBOX * K.square(input_mask * (pred_box_delta - box_delta_input))) / num_objects) #reshape input for correct broadcasting input_mask = K.reshape(input_mask, [mc.BATCH_SIZE, mc.ANCHORS_NO]) #confidence score loss conf_loss = K.mean( K.sum(K.square((ious - pred_conf)) * (input_mask * mc.LOSS_COEF_CONF_POS / num_objects + (1 - input_mask) * mc.LOSS_COEF_CONF_NEG / (mc.ANCHORS_NO - num_objects)), axis=[1]), ) # add above losses total_loss = class_loss + conf_loss + bbox_loss return total_loss
def log_prob(self, x): # Determinant of the diagonal covariance matrix is the product of variances. log_det = K.sum(K.log(self.var)) return -K.sum(K.square(x - self.mean) / (2 * self.var), axis=-1) - log_det / 2
def PSNR(y_true, y_pred): max_pixel = 1.0 return 10.0 * (1.0 / math.log(10)) * K.log( (max_pixel**2) / (K.mean(K.square(y_pred - y_true))))
def yolo4_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0, use_focal_loss=False, use_focal_obj_loss=False, use_softmax_loss=False, use_giou_loss=False, use_diou_loss=False): '''Return yolo4_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors) // 3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] ] if num_layers == 3 else [[3, 4, 5], [0, 1, 2]] input_shape = K.cast( K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [ K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers) ] loss = 0 total_location_loss = 0 total_confidence_loss = 0 total_class_loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] if label_smoothing: true_class_probs = _smooth_labels(true_class_probs, label_smoothing) grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2] * grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][..., 2:3] * y_true[l][..., 3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write( b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) return b + 1, ignore_mask _, ignore_mask = tf.while_loop(lambda b, *args: b < m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) if use_focal_obj_loss: # Focal loss for objectness confidence confidence_loss = sigmoid_focal_loss(object_mask, raw_pred[..., 4:5]) else: confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask if use_focal_loss: # Focal loss for classification score if use_softmax_loss: class_loss = softmax_focal_loss(true_class_probs, raw_pred[..., 5:]) else: class_loss = sigmoid_focal_loss(true_class_probs, raw_pred[..., 5:]) else: if use_softmax_loss: # use softmax style classification output class_loss = object_mask * K.expand_dims( K.categorical_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True), axis=-1) else: # use sigmoid style classification output class_loss = object_mask * K.binary_crossentropy( true_class_probs, raw_pred[..., 5:], from_logits=True) if use_giou_loss: # Calculate GIoU loss as location loss raw_true_box = y_true[l][..., 0:4] giou = box_giou(pred_box, raw_true_box) giou_loss = object_mask * box_loss_scale * (1 - giou) giou_loss = K.sum(giou_loss) / mf location_loss = giou_loss elif use_diou_loss: # Calculate DIoU loss as location loss raw_true_box = y_true[l][..., 0:4] diou = box_diou(pred_box, raw_true_box) diou_loss = object_mask * box_loss_scale * (1 - diou) diou_loss = K.sum(diou_loss) / mf location_loss = diou_loss else: # Standard YOLO location loss # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy( raw_true_xy, raw_pred[..., 0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square( raw_true_wh - raw_pred[..., 2:4]) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf location_loss = xy_loss + wh_loss confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += location_loss + confidence_loss + class_loss total_location_loss += location_loss total_confidence_loss += confidence_loss total_class_loss += class_loss # Fit for tf 2.0.0 loss shape loss = K.expand_dims(loss, axis=-1) return loss #, total_location_loss, total_confidence_loss, total_class_loss
def log(x): return K.log(K.clip(x, min_value=1e-7, max_value=10000))
def call(self, inputs): return inputs*K.tanh(K.log(K.pow((1+K.exp(inputs)),self.beta)))
def fn(y, yhat): yhat = K.clip(yhat, K.epsilon(), 1 - K.epsilon()) return -K.mean((1 - rho) * y * K.log(yhat) + rho * (1 - y) * K.log(1 - yhat))
def call(self, inputs): return K.log(K.softmax(inputs))
def crossentropy(y, yhat): yhat = K.clip(yhat, K.epsilon(), 1 - K.epsilon()) return -K.mean(y * K.log(yhat) + (1 - y) * K.log(1 - yhat))
def abs_KL_div(y_true, y_pred): y_true = K.clip(y_true, K.epsilon(), None) y_pred = K.clip(y_pred, K.epsilon(), None) # return K.sum( K.abs( (y_true- y_pred) * (K.log(y_true / y_pred))), axis=-1) return K.sum((y_true - y_pred) * (K.log(y_true / y_pred)), axis=-1)
def run(self): self.make_directory() self.make_dataset() train_batch_generator = self.train_batch_generator() os.environ['CUDA_VISIBLE_DEVICES'] = self.gpu_id K.set_image_data_format('channels_last') config = tf.ConfigProto() config.gpu_options.allow_growth = True tf.Session(config=config) self.build_network() real_A = self.network_G.input fake_B = self.network_G.output real_B = self.network_D.inputs[1] output_D_real = self.network_D([real_A, real_B]) output_D_fake = self.network_D([real_A, fake_B]) loss_FN = lambda output, target: -K.mean( K.log(output + 1e-12) * target + K.log(1 - output + 1e-12) * (1 - target)) loss_D_real = loss_FN(output_D_real, K.ones_like(output_D_real)) loss_D_fake = loss_FN(output_D_fake, K.zeros_like(output_D_fake)) loss_G_fake = loss_FN(output_D_fake, K.ones_like(output_D_fake)) loss_L = K.mean(K.abs(fake_B - real_B)) loss_D = loss_D_real + loss_D_fake training_updates_D = Adam(lr=2e-4, beta_1=0.5).get_updates( self.network_D.trainable_weights, [], loss_D) network_D_train = K.function([real_A, real_B], [loss_D / 2.0], training_updates_D) loss_G = loss_G_fake + 100 * loss_L training_updates_G = Adam(lr=2e-4, beta_1=0.5).get_updates( self.network_G.trainable_weights, [], loss_G) network_G_train = K.function([real_A, real_B], [loss_G_fake, loss_L], training_updates_G) t0 = time.time() t1 = time.time() self.iter_gen, epoch = 0, 0 err_L, err_G, err_D = 0, 0, 0 err_L_sum, err_G_sum, err_D_sum = 0, 0, 0 print('\n--------------------------------\n') print('\nNow start below session!\n') print('Mode: %s' % self.mode) print('Checkpoint save path: %s' % (self.root_ckpt)) print('Validation snap save path: %s' % (self.root_snap)) print('Test result save path: %s' % (self.root_test)) print('# of train, validation, and test datasets : %d, %d, %d' % (self.nb_train, self.nb_validation, self.nb_test)) print('\n--------------------------------\n') while self.iter_gen <= self.iter_max: epoch, train_A, train_B = next(train_batch_generator) err_G, err_L = network_G_train([train_A, train_B]) err_D, = network_D_train([train_A, train_B]) err_D_sum += err_D err_G_sum += err_G err_L_sum += err_L self.iter_gen += self.bsize if self.iter_gen % self.iter_display == 0: err_D_mean = err_D_sum / self.iter_display err_G_mean = err_G_sum / self.iter_display err_L_mean = err_L_sum / self.iter_display print( '[%d][%d/%d] LOSS_D: %5.3f LOSS_G: %5.3f LOSS_L: %5.3f T: %dsec/%dits, Total T: %d' % (epoch, self.iter_gen, self.iter_max, err_D_mean, err_G_mean, err_L_mean, time.time() - t1, self.iter_display, time.time() - t0)) err_L_sum, err_G_sum, err_D_sum = 0, 0, 0 t1 = time.time() if self.iter_gen % self.iter_save == 0: dst_model_G = '%s/%s.iter.%07d.G.h5' % ( self.root_ckpt, self.mode, self.iter_gen) dst_model_D = '%s/%s.iter.%07d.D.h5' % ( self.root_ckpt, self.mode, self.iter_gen) self.network_G.save(dst_model_G) self.network_D.save(dst_model_D) print('network_G and network_D are saved under %s' % (self.root_ckpt)) self.run_validation() self.run_test() t1 = time.time()
def binary_crossentropy(x, y): return -(y * K.log(x) + (1 - y) * K.log(1 - x))
def call(self, inputs): return K.log(K.sigmoid(inputs))
def my_loss(y_true, y_pred): # return K.log(K.mean(1 + mean_squared_error(y_true, y_pred))) # return (K.sum(y_true, axis=[1, 2]) - K.sum(y_pred, axis=[1, 2])) ** 2 return (K.log(1 + K.sum(y_true, axis=[1, 2])) - K.log(1 + K.sum(y_pred, axis=[1, 2])))**2
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): '''Return yolo_loss tensor Parameters ---------- yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body y_true: list of array, the output of preprocess_true_boxes anchors: array, shape=(N, 2), wh num_classes: integer ignore_thresh: float, the iou threshold whether to ignore object confidence loss Returns ------- loss: tensor, shape=(1,) ''' num_layers = len(anchors)//3 # default setting yolo_outputs = args[:num_layers] y_true = args[num_layers:] anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0])) grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], K.dtype(y_true[0])) for l in range(num_layers)] loss = 0 m = K.shape(yolo_outputs[0])[0] # batch size, tensor mf = K.cast(m, K.dtype(yolo_outputs[0])) for l in range(num_layers): object_mask = y_true[l][..., 4:5] true_class_probs = y_true[l][..., 5:] grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) pred_box = K.concatenate([pred_xy, pred_wh]) # Darknet raw box to calculate loss. raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid raw_true_wh = K.log(y_true[l][..., 2:4] / anchors[anchor_mask[l]] * input_shape[::-1]) raw_true_wh = K.switch(object_mask, raw_true_wh, K.zeros_like(raw_true_wh)) # avoid log(0)=-inf box_loss_scale = 2 - y_true[l][...,2:3]*y_true[l][...,3:4] # Find ignore mask, iterate over each of batch. ignore_mask = tf.TensorArray(K.dtype(y_true[0]), size=1, dynamic_size=True) object_mask_bool = K.cast(object_mask, 'bool') def loop_body(b, ignore_mask): true_box = tf.boolean_mask(y_true[l][b,...,0:4], object_mask_bool[b,...,0]) iou = box_iou(pred_box[b], true_box) best_iou = K.max(iou, axis=-1) ignore_mask = ignore_mask.write(b, K.cast(best_iou<ignore_thresh, K.dtype(true_box))) return b+1, ignore_mask _, ignore_mask = K.control_flow_ops.while_loop(lambda b,*args: b<m, loop_body, [0, ignore_mask]) ignore_mask = ignore_mask.stack() ignore_mask = K.expand_dims(ignore_mask, -1) # K.binary_crossentropy is helpful to avoid exp overflow. xy_loss = object_mask * box_loss_scale * K.binary_crossentropy(raw_true_xy, raw_pred[...,0:2], from_logits=True) wh_loss = object_mask * box_loss_scale * 0.5 * K.square(raw_true_wh-raw_pred[...,2:4]) confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True)+ \ (1-object_mask) * K.binary_crossentropy(object_mask, raw_pred[...,4:5], from_logits=True) * ignore_mask class_loss = object_mask * K.binary_crossentropy(true_class_probs, raw_pred[...,5:], from_logits=True) xy_loss = K.sum(xy_loss) / mf wh_loss = K.sum(wh_loss) / mf confidence_loss = K.sum(confidence_loss) / mf class_loss = K.sum(class_loss) / mf loss += xy_loss + wh_loss + confidence_loss + class_loss if print_loss: loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum(ignore_mask)], message='loss: ') return loss
def custom_loss(y_true, y_pred): return 4 * weighted_binary_crossentropy(y_true, y_pred) - K.log( brian_f1(y_true, y_pred))
def call(self, inputs): output = K.cast(K.greater(self.alpha, 0), 'float32') * (K.exp(self.alpha * inputs) - 1)/(self.alpha) + \ self.alpha + K.cast(K.less(self.alpha, 0), 'float32') * (- (K.log(1 - self.alpha * (inputs + self.alpha))) / self.alpha) + \ K.cast(K.equal(self.alpha, 0), 'float32') * inputs
def _loss_generator(y_true, y_pred): y_pred = K.clip(y_pred, _EPSILON, 1.0 - _EPSILON) out = -(K.log(y_pred)) return K.mean(out, axis=-1)
def focal_loss_fixed(y_true, y_pred): pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(K.epsilon()+pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0 + K.epsilon()))
def modified_categorical_crossentropy(y_mat, prob_fcst): prob_obs_cat = K.sum(y_mat * prob_fcst, axis=1) return -K.mean(K.log(prob_obs_cat))