def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): # Learning rate multipliers if self.multipliers: multiplier = [ mult for mult in self.multipliers if mult in p.name ] else: multiplier = None if multiplier: new_lr_t = lr_t * self.multipliers[multiplier[0]] if self.debug_verbose: print('Setting {} to learning rate {}'.format( multiplier[0], new_lr_t)) print(K.get_value(new_lr_t)) else: new_lr_t = lr_t if self.debug_verbose: print('No change in learning rate {}'.format(p.name)) print(K.get_value(new_lr_t)) m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) p_t = p - new_lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: p_t = p - new_lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 # Applies bounds on actual learning rate step_size = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) final_lr = self.final_lr * lr / self.base_lr lower_bound = final_lr * (1. - 1. / (self.gamma * t + 1.)) upper_bound = final_lr * (1. + 1. / (self.gamma * t)) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsbound: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): # apply weight decay if self.weight_decay != 0.: g += self.weight_decay * K.stop_gradient(p) m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsbound: vhat_t = K.maximum(vhat, v_t) denom = (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: denom = (K.sqrt(v_t) + self.epsilon) # Compute the bounds step_size_p = step_size * K.ones_like(denom) step_size_p_bound = step_size_p / denom bounded_lr_t = m_t * K.minimum( K.maximum(step_size_p_bound, lower_bound), upper_bound) p_t = p - bounded_lr_t self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def focal_loss(y_true, y_pred): gamma = 2.0 alpha = 0.25 pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) - K.sum( (1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
def build_loss(self): # Infinity norm if np.isinf(self.p): value = K.max(self.img) else: value = K.pow(K.sum(K.pow(K.abs(self.img), self.p)), 1. / self.p) return normalize(self.img, value)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 '''Bias corrections according to the Adam paper ''' lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): #################################################### # Add a lr multiplier for vars outside excluded_vars if p.name in self.excluded_vars: multiplied_lr_t = lr_t else: multiplied_lr_t = lr_t * self.lr_mult ################################################### m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) '''Schedule multiplier eta_t = 1 for simple AdamW According to the AdamW paper, eta_t can be fixed, decay, or also be used for warm restarts (AdamWR to come). ''' eta_t = 1. p_t = p - eta_t * (multiplied_lr_t * m_t / (K.sqrt(v_t) + self.epsilon)) if self.weight_decay != 0: '''Normalized weight decay according to the AdamW paper ''' w_d = self.weight_decay * K.sqrt(self.batch_size / (self.samples_per_epoch * self.epochs)) p_t = p_t - eta_t * (w_d * p) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates_Padam(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] base_lr = self._optimizer.learning_rate if self.initial_decay > 0: base_lr = base_lr * (1. / (1. + self.decay * K.cast( self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = base_lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): if self._get_multiplier(p) is None: multiplier = 1.0 else: multiplier = self._get_multiplier(p) m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) denom = (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: denom = (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) # Partial momentum adaption. new_p = p - (lr_t * multiplier * (m_t / (denom**(self.partial * 2)))) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def gelu(x): """ GELU activation, described in paper "Gaussian Error Linear Units (GELUs)" https://arxiv.org/pdf/1606.08415.pdf """ c = math.sqrt(2 / math.pi) return 0.5 * x * (1 + K.tanh(c * (x + 0.044715 * K.pow(x, 3))))
def binary_focal_loss_fixed(y_true, y_pred): """ :param y_true: A tensor of the same shape as `y_pred` :param y_pred: A tensor resulting from a sigmoid :return: Output tensor. """ pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) epsilon = K.epsilon() # clip to prevent NaN's and Inf's pt_1 = K.clip(pt_1, epsilon, 1. - epsilon) pt_0 = K.clip(pt_0, epsilon, 1. - epsilon) return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1)) \ -K.sum((1 - alpha) * K.pow(pt_0, gamma) * K.log(1. - pt_0))
def build_loss(self): r"""Implements the N-dim version of function $$TV^{\beta}(x) = \sum_{whc} \left ( \left ( x(h, w+1, c) - x(h, w, c) \right )^{2} + \left ( x(h+1, w, c) - x(h, w, c) \right )^{2} \right )^{\frac{\beta}{2}}$$ to return total variation for all images in the batch. """ image_dims = K.ndim(self.img) - 2 # Constructing slice [1:] + [:-1] * (image_dims - 1) and [:-1] * (image_dims) start_slice = [slice(1, None, None)] + [ slice(None, -1, None) for _ in range(image_dims - 1) ] end_slice = [slice(None, -1, None) for _ in range(image_dims)] samples_channels_slice = [ slice(None, None, None), slice(None, None, None) ] # Compute pixel diffs by rolling slices to the right per image dim. tv = None for i in range(image_dims): ss = tuple(samples_channels_slice + start_slice) es = tuple(samples_channels_slice + end_slice) diff_square = K.square(self.img[utils.slicer[ss]] - self.img[utils.slicer[es]]) tv = diff_square if tv is None else tv + diff_square # Roll over to next image dim start_slice = np.roll(start_slice, 1).tolist() end_slice = np.roll(end_slice, 1).tolist() tv = K.sum(K.pow(tv, self.beta / 2.)) return normalize(self.img, tv)
def focal_loss(y_true, y_pred): # Define espislon so that the backpropagation will not result int NaN # for 0 divisor case epsilon = K.epsilon() # Add the epsilon to prediction value # y_pred = y_pred + epsilon # Clip the prediction value y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon) alpha_factor = K.ones_like(y_true) * alpha # Calculate p_t p_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) # Calculate alpha_t alpha_t = tf.where(K.equal(y_true, 1), alpha_factor, 1 - alpha_factor) # Calculate cross entropy cross_entropy = -K.log(p_t) weight = alpha_t * K.pow((1 - p_t), gamma) # Calculate focal loss loss = weight * cross_entropy # Sum the losses in mini_batch loss = K.sum(loss, axis=1) return loss
def smooth_l1(y_true, y_pred, sigma=3.0, axis=None): """Compute the smooth L1 loss of y_pred w.r.t. y_true. Args: y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive). y_pred: Tensor from the network of shape (B, N, 4). sigma: The point where the loss changes from L2 to L1. Returns: The smooth L1 loss of y_pred w.r.t. y_true. """ if axis is None: axis = 1 if K.image_data_format( ) == 'channels_first' else K.ndim(y_pred) - 1 sigma_squared = sigma**2 # compute smooth L1 loss # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma # |x| - 0.5 / sigma / sigma otherwise regression_diff = K.abs(y_true - y_pred) # |y - f(x)| regression_loss = tf.where(K.less(regression_diff, 1.0 / sigma_squared), 0.5 * sigma_squared * K.pow(regression_diff, 2), regression_diff - 0.5 / sigma_squared) return K.sum(regression_loss, axis=axis)
def continuity_loss(x, im_height, im_width): assert K.ndim(x) == 4 a = K.square(x[:, :im_height - 1, :im_width - 1, :] - x[:, 1:, :im_width - 1, :]) b = K.square(x[:, :im_height - 1, :im_width - 1, :] - x[:, :im_height - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25))
def weighted_focal_loss(y_true, y_pred, n_classes=3, gamma=2., axis=None, from_logits=False): """Focal loss between an output tensor and a target tensor. Automatically computes the class weights from the target image and uses them to weight the cross entropy Args: y_true: A tensor of the same shape as y_pred. y_pred: A tensor resulting from a softmax (unless from_logits is True, in which case y_pred is expected to be the logits). from_logits: Boolean, whether y_pred is the result of a softmax, or is a tensor of logits. Returns: tensor: Output tensor. """ if from_logits: raise Exception('weighted_focal_loss cannot take logits') if axis is None: axis = 1 if K.image_data_format() == 'channels_first' else K.ndim(y_pred) - 1 reduce_axis = [x for x in list(range(K.ndim(y_pred))) if x != axis] # scale preds so that the class probas of each sample sum to 1 y_pred = y_pred / K.sum(y_pred, axis=axis, keepdims=True) # manual computation of crossentropy _epsilon = tf.convert_to_tensor(K.epsilon(), y_pred.dtype.base_dtype) y_pred = tf.clip_by_value(y_pred, _epsilon, 1. - _epsilon) y_true_cast = K.cast(y_true, K.floatx()) total_sum = K.sum(y_true_cast) class_sum = K.sum(y_true_cast, axis=reduce_axis, keepdims=True) class_weights = 1.0 / K.cast_to_floatx(n_classes) * tf.divide(total_sum, class_sum + 1.) temp_loss = (K.pow(1. - y_pred, gamma) * K.log(y_pred) * class_weights) focal_loss = - K.sum(y_true * temp_loss, axis=axis) return focal_loss
def total_variation_loss(x): assert K.ndim(x) == 4 a = K.square(x[:, :, 1:, :img_width - 1] - x[:, :, :img_height - 1, :img_width - 1]) b = K.square(x[:, :, :img_height - 1, 1:] - x[:, :, :img_width - 1, :img_height - 1]) #a = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, 1:, :img_height-1]) #b = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, :img_width-1, 1:]) return K.sum(K.pow(a + b, 1.25))
def total_variation_loss(x): assert K.ndim(x) == 4 if K.image_data_format() == "channels_first": a = K.square(x[:, :, :img_width - 1, :img_height - 1] - x[:, :, 1:, :img_height - 1]) b = K.square(x[:, :, :img_width - 1, :img_height - 1] - x[:, :, :img_width - 1, 1:]) else: a = K.square(x[:, :img_width - 1, :img_height - 1, :] - x[:, 1:, :img_height - 1, :]) b = K.square(x[:, :img_width - 1, :img_height - 1, :] - x[:, :img_width - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25))
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 beta_1_t = K.pow(self.beta_1, t) beta_2_t = K.pow(self.beta_2, t) rho = 2 / (1 - self.beta_2) - 1 rho_t = rho - 2 * t * beta_2_t / (1 - beta_2_t) r_t = K.sqrt( K.relu(rho_t - 4) * K.relu(rho_t - 2) * rho / ((rho - 4) * (rho - 2) * rho_t)) flag = K.cast(rho_t > 4, K.floatx()) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) mhat_t = m_t / (1 - beta_1_t) vhat_t = K.sqrt(v_t / (1 - beta_2_t)) p_t = p - lr * mhat_t * (flag * r_t / (vhat_t + self.epsilon) + (1 - flag)) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def custom_loss(self, y_true, y_pred): """ GloVe's loss function, view section 3.1 on the original paper for details. :param y_true: The actual values, y_true = X_ij. :param y_pred: The predicted occurrences from the model ( w_i^T*w_j ). :return: The loss associated with this batch. """ x_max = self.x_max alpha = self.alpha fxij = k.pow(k.clip(y_true / x_max, 0.0, 1.0), alpha) return k.sum(fxij * k.square(y_pred - k.log(y_true)), axis=-1)
def total_variation_loss(x): assert 4 == K.ndim(x) if K.image_dim_ordering() == 'th': a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) else: a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25))
def total_variation_loss(x, img_nrows, img_ncols): assert 4 == K.ndim(x) if K.image_data_format() == 'channels_first': a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) else: a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25))
def focal_crossentropy(y_true, y_pred): bce = K.binary_crossentropy(y_true, y_pred) y_pred = K.clip(y_pred, K.epsilon(), 1.- K.epsilon()) p_t = (y_true*y_pred) + ((1-y_true)*(1-y_pred)) alpha_factor = 1 modulating_factor = 1 alpha_factor = y_true*alpha + ((1-alpha)*(1-y_true)) modulating_factor = K.pow((1-p_t), gamma) # compute the final loss and return return K.mean(alpha_factor*modulating_factor*bce, axis=-1)
def call(self, inputs, **kwargs): length = K.shape(inputs[0])[1] + K.shape(inputs[1])[1] inputs = K.tile( K.expand_dims(K.arange(length - 1, -1, -1, dtype=K.floatx()), axis=0), [K.shape(inputs[0])[0], 1], ) if self.clamp_len is not None: inputs = K.clip(inputs, min_value=0, max_value=self.clamp_len) inputs = K.expand_dims(inputs, axis=-1) output_dim = K.cast(self.output_dim, K.floatx()) ranges = K.expand_dims(K.arange(0.0, self.output_dim, 2.0), axis=0) / output_dim inverse = 1.0 / K.pow(10000.0, ranges) positions = inputs * inverse return K.concatenate([K.sin(positions), K.cos(positions)], axis=-1)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] # decoupled weight decay (4/6) wd = self.wd lr = self.lr if self.initial_decay > 0: lr *= (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) # decoupled weight decay (5/6) eta_t = lr / self.init_lr t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] self.weights = [self.iterations] + ms + vs for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) # decoupled weight decay (6/6) p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - eta_t * wd * p self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) self.updates.append(K.update(p, new_p)) return self.updates
def __call__(self, x): assert K.ndim(x.output) == 4 x_out = x.output shape = K.shape(x_out) img_width, img_height,channel = (shape[1],shape[2], shape[3]) size = img_width * img_height * channel if image_dim_ordering() == 'th': a = K.square(x_out[:, :, :img_width - 1, :img_height - 1] - x_out[:, :, 1:, :img_height - 1]) b = K.square(x_out[:, :, :img_width - 1, :img_height - 1] - x_out[:, :, :img_width - 1, 1:]) else: a = K.square(x_out[:, :img_width - 1, :img_height - 1, :] - x_out[:, 1:, :img_height - 1, :]) b = K.square(x_out[:, :img_width - 1, :img_height - 1, :] - x_out[:, :img_width - 1, 1:, :]) loss = self.weight * K.sum(K.pow(a + b, 1.25)) return loss
def total_variation_loss(x): img_nrows = self.img_shape[1] img_ncols = self.img_shape[2] assert K.ndim(x) == 4 if K.image_data_format() == 'channels_first': a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) else: a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) return K.sum(K.pow(a + b, 1.25))
def call(self, x, **kwargs): if (self.size is None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) batch_size, seq_len = K.shape(x)[0], K.shape(x)[1] position_j = 1. / K.pow( 10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size) position_j = K.expand_dims(position_j, 0) # K.arange不支持变长,只好用这种方法生成 position_i = K.cumsum(K.ones_like(x[:, :, 0]), 1) - 1 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) position_ij = K.concatenate( [K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': return position_ij + x elif self.mode == 'concat': return K.concatenate([position_ij, x], 2)
def ssd_loss(y_true, y_pred): #ssd_loss num_classes = 11 # tf.shape(y_true)[2] - 4 # openCV에서 동적 shape를 지원안함. y_true = tf.reshape(y_true, [-1, num_classes + 4]) y_pred = tf.reshape(y_pred, [-1, num_classes - 1 + 4]) eps = K.epsilon() # Split Classification and Localization output y_true_clf, y_true_loc = tf.split(y_true, [num_classes, 4], axis=-1) y_pred_clf, y_pred_loc = tf.split(y_pred, [num_classes - 1, 4], axis=-1) # split foreground & background mask = y_true_clf[:, -1] ignore_mask = tf.where(tf.equal(mask, -1.), tf.zeros_like(mask), tf.ones_like(mask)) neg_mask = tf.where(tf.equal(mask, 1.), tf.ones_like(mask), tf.zeros_like(mask)) pos_mask = tf.where(tf.equal(mask, 0.), tf.ones_like(mask), tf.zeros_like(mask)) y_true_clf = tf.where(tf.not_equal(y_true_clf, 0), tf.ones_like(y_true_clf), tf.zeros_like(y_true_clf)) num_pos = tf.reduce_sum(pos_mask) num_neg = tf.reduce_sum(neg_mask) # Focal Loss y_pred_clf = K.clip(y_pred_clf, eps, 1. - eps) pt = tf.where(tf.equal(y_true_clf[:, :num_classes - 1], 1.), y_pred_clf, 1. - y_pred_clf) loss = -K.pow(1. - pt, gamma) * tf.log(pt) clf_loss = tf.reduce_sum(alpha * loss, axis=-1) clf_loss = tf.reduce_sum(ignore_mask * clf_loss) / (num_pos + num_neg + eps) # smooth l1 loss l1_loss = tf.abs(y_true_loc - y_pred_loc) l2_loss = 0.5 * (y_true_loc - y_pred_loc) ** 2 loc_loss = tf.where(tf.less(l1_loss, 1.0), l2_loss, l1_loss - 0.5) loc_loss = tf.reduce_mean(loc_loss, axis=-1) loc_loss = tf.reduce_sum(loc_loss * pos_mask) / (num_pos + eps) # total loss return alpha * clf_loss + loc_loss
def focal_loss(y_true, y_pred): # Define epsilon so that the backpropagation will no result in NaN # for o divisor case epsilon = K.epsilon() # Add the epsilon to prediction value # y_pred = y_pred + epsilon # Clip the prediction value y_pred = K.clip(y_pred, epsilon, 1.0 - epsilon) # Calculate cross entropy cross_entropy = -y_true * K.log(y_pred) # Calculate weight that consists of modulating factor and weighting factor weight = alpha * y_true * K.pow((1 - y_pred), gamma) # Calculate focal loss loss = weight * cross_entropy # Sum the losses in mini_batch loss = K.sum(loss, axis=1) return loss
def call(self, x, mask=None): if (self.size == None) or (self.mode == 'sum'): self.size = int(x.shape[-1]) position_j = 1. / \ K.pow(10000., 2 * K.arange(self.size / 2, dtype='float32') / self.size) position_j = K.expand_dims(position_j, 0) position_i = tf.cumsum(K.ones_like(x[:, :, 0]), 1) - 1 position_i = K.expand_dims(position_i, 2) position_ij = K.dot(position_i, position_j) outputs = K.concatenate([K.cos(position_ij), K.sin(position_ij)], 2) if self.mode == 'sum': if self.scale: outputs = outputs * self.size**0.5 return x + outputs elif self.mode == 'concat': return K.concatenate([outputs, x], 2)
def categorical_focal_loss_fixed(y_true, y_pred): """ :param y_true: A tensor of the same shape as `y_pred` :param y_pred: A tensor resulting from a softmax :return: Output tensor. """ # Scale predictions so that the class probas of each sample sum to 1 y_pred /= K.sum(y_pred, axis=-1, keepdims=True) # Clip the prediction value to prevent NaN's and Inf's epsilon = K.epsilon() y_pred = K.clip(y_pred, epsilon, 1. - epsilon) # Calculate Cross Entropy cross_entropy = -y_true * K.log(y_pred) # Calculate Focal Loss loss = alpha * K.pow(1 - y_pred, gamma) * cross_entropy # Sum the losses in mini_batch return K.sum(loss, axis=1)
def focal_loss(y_true, y_pred, gamma=2., alpha=.25): pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred)) pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred)) return K.mean(-alpha * K.pow(1 - pt_1, gamma) * K.log(pt_1) - (1 - alpha) * K.pow(pt_0, gamma) * K.log(1 - pt_0), axis=-1)