def smape(true, predicted): """Symmetric mean absolute percentage error loss function :param true: true values :type true: np.array :param predicted: predicted values :type predicted: np.array :return: smape loss :rtype: float """ epsilon = 0.1 summ = K.maximum(K.abs(true) + K.abs(predicted) + epsilon, 0.5 + epsilon) smape = K.abs(predicted - true) / summ * 2.0 return smape
def box_iou(b1, b2): b1 = K.expand_dims(b1, axis=-2) b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_min = b1_xy - (b1_wh / 2.) b1_max = b1_xy + (b1_wh / 2.) b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2 = K.expand_dims(b2, axis=0) b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_min = b2_xy - (b2_wh / 2.) b2_max = b2_xy + (b2_wh / 2.) b2_area = b2_wh[..., 0] * b2_wh[..., 1] intersect_mins = K.maximum(b1_min, b2_min) intersect_maxs = K.minimum(b1_max, b2_max) intersect_wh = K.maximum(intersect_maxs - intersect_mins, 0.0) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] iou = intersect_area / (b1_area + b2_area - intersect_area) return iou
def correlation(y_true, y_pred): x = y_true y = y_pred mx = K.mean(x) my = K.mean(y) xm, ym = x - mx, y - my r_num = K.sum(tf.multiply(xm, ym)) r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym)))) r = r_num / r_den r = K.maximum(K.minimum(r, 1.0), -1.0) return r
def saliency_maps(temp_model, grid_train_temp, range_top, range_ex, batch_size, layer_id=[-1, -2]): # allocation top_examples = np.empty( (range_top[1] - range_top[0], range_ex[1] - range_ex[0]), dtype=int) # indices of (sorted neurons, sorted samples) # array that contains gradients, size: top neuron, top exp, size of gradients on the input end top_gradients = np.empty(( range_top[1] - range_top[0], range_ex[1] - range_ex[0], ) + grid_train_temp.shape[1:]) batch_i = list(range(0, grid_train_temp.shape[0], batch_size)) + [ grid_train_temp.shape[0] ] # batch samples # get weight from the output end weights = temp_model.layers[layer_id[0]].get_weights()[0].ravel() top_neurons = weights.argsort()[::-1][ range_top[0]:range_top[1]] # most activated neurals # loop over neurons print('Sorted order | neuron index | neuron weights') for n, neuron in enumerate(top_neurons): print(' {} | {} | {}'.format( n, neuron, weights[neuron])) # order, index of neuron, weights # define the activation of neurons as a backend function (for sorting the top examples) act_func = K.function( [temp_model.input, K.learning_phase()], [temp_model.layers[layer_id[1]].output[:, neuron]]) # loss = a monotonic function that takes neurons' final output loss = (temp_model.layers[layer_id[1]].output[:, neuron] - 4)**2 # calculate gradients from loss (output end) to input end grads = K.gradients(loss, temp_model.input)[0] # standardizing gradients grads /= K.maximum(K.std(grads), K.epsilon()) # define gradients calculation as a backend function grad_func = K.function([temp_model.input, K.learning_phase()], [grads]) # allocation activation array act_values = np.zeros(grid_train_temp.shape[0]) # loop over samples by batch for b in range(len(batch_i) - 1): act_values[batch_i[b]:batch_i[b + 1]] = act_func( [grid_train_temp[batch_i[b]:batch_i[b + 1]], 0])[0] # sort activation values and reteave examples index / gradients top_examples[n] = act_values.argsort()[::-1][range_ex[0]:range_ex[1]] top_gradients[n, ...] = -grad_func( [grid_train_temp[top_examples[n]], 0])[0] return top_neurons, top_examples, top_gradients
def compute_nms(args): boxes, classification = args def nms_fn(score, label): score_indices = tf.where(backend.greater(score, config.score_threshold)) filtered_boxes = tf.gather_nd(boxes, score_indices) filtered_scores = backend.gather(score, score_indices)[:, 0] nms_indices = tf.image.non_max_suppression(filtered_boxes, filtered_scores, config.max_boxes) score_indices = backend.gather(score_indices, nms_indices) label = tf.gather_nd(label, score_indices) score_indices = backend.stack([score_indices[:, 0], label], axis=1) return score_indices all_indices = [] for c in range(int(classification.shape[1])): scores = classification[:, c] labels = c * tf.ones((backend.shape(scores)[0], ), dtype='int64') all_indices.append(nms_fn(scores, labels)) indices = backend.concatenate(all_indices, axis=0) scores = tf.gather_nd(classification, indices) labels = indices[:, 1] scores, top_indices = tf.nn.top_k(scores, k=backend.minimum( config.max_boxes, backend.shape(scores)[0])) indices = backend.gather(indices[:, 0], top_indices) boxes = backend.gather(boxes, indices) labels = backend.gather(labels, top_indices) pad_size = backend.maximum(0, config.max_boxes - backend.shape(scores)[0]) boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1) scores = tf.pad(scores, [[0, pad_size]], constant_values=-1) labels = tf.pad(labels, [[0, pad_size]], constant_values=-1) labels = backend.cast(labels, 'int32') boxes.set_shape([config.max_boxes, 4]) scores.set_shape([config.max_boxes]) labels.set_shape([config.max_boxes]) return [boxes, scores, labels]
def contrastive_loss(y_true, y_pred, margin=0.7): '''Contrastive loss from Hadsell-et-al.'06 http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf This loss encourages the embedding to be close to each other for the samples of the same label and the embedding to be far apart at least by the margin constant for the samples of different labels. (from Keras/Tf addd-on) ''' square_pred = K.square(y_pred) margin_square = K.square(K.maximum(margin - y_pred, 0.)) return K.mean(y_true * square_pred + (1. - y_true) * margin_square)
def asymmetric_outlier_mse(y_true, y_pred): """Loss function which asymmetrically penalizes over estimations of large values.""" top_over = 40.0 * K.maximum(y_true - 2.0, 0.0) * K.maximum( y_true - y_pred, 0.0) * mean_squared_error(y_true, y_pred) top_over += 20.0 * K.maximum(y_true - 1.0, 0.0) * K.maximum( y_true - y_pred, 0.0) * mean_squared_error(y_true, y_pred) top_under = 5.0 * K.maximum(y_true - 1.0, 0.0) * K.maximum( y_pred - y_true, 0.0) * mean_squared_error(y_true, y_pred) return top_over + top_under + logcosh(y_true, y_pred)
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) shapes = [K.shape(p) for p in params] alphas = [ K.variable(K.ones(shape) * self.init_alpha) for shape in shapes ] old_grads = [K.zeros(shape) for shape in shapes] prev_weight_deltas = [K.zeros(shape) for shape in shapes] self.weights = alphas + old_grads self.updates = [] for param, grad, old_grad, prev_weight_delta, alpha in zip( params, grads, old_grads, prev_weight_deltas, alphas): # equation 4 new_alpha = K.switch( K.greater(grad * old_grad, 0), K.minimum(alpha * self.scale_up, self.max_alpha), K.switch(K.less(grad * old_grad, 0), K.maximum(alpha * self.scale_down, self.min_alpha), alpha)) # equation 5 new_delta = K.switch( K.greater(grad, 0), -new_alpha, K.switch(K.less(grad, 0), new_alpha, K.zeros_like(new_alpha))) # equation 7 weight_delta = K.switch(K.less(grad * old_grad, 0), -prev_weight_delta, new_delta) # equation 6 new_param = param + weight_delta # reset gradient_{t-1} to 0 if gradient sign changed (so that we do # not "double punish", see paragraph after equation 7) grad = K.switch(K.less(grad * old_grad, 0), K.zeros_like(grad), grad) # Apply constraints #if param in constraints: # c = constraints[param] # new_param = c(new_param) self.updates.append(K.update(param, new_param)) self.updates.append(K.update(alpha, new_alpha)) self.updates.append(K.update(old_grad, grad)) self.updates.append(K.update(prev_weight_delta, weight_delta)) return self.updates
def giou(box_1, box_2, dtype=tf.float32): box1_xy = box_1[..., :2] box1_wh = box_1[..., 2:4] box1_mins = box1_xy - box1_wh / 2. box1_maxes = box1_xy + box1_wh / 2. box2_xy = box_2[..., :2] box2_wh = box_2[..., 2:4] box2_mins = box2_xy - box2_wh / 2. box2_maxes = box2_xy + box2_wh / 2. intersect_mins = K.minimum(box1_mins, box2_mins) intersect_maxes = K.maximum(box1_maxes, box2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) C = intersect_wh[..., 0] * intersect_wh[..., 1] box1_area = box1_wh[..., 0] * box1_wh[..., 1] box2_area = box2_wh[..., 0] * box2_wh[..., 1] IOU = tf.convert_to_tensor(box_iou(box_1, box_2)) temp = tf.math.divide_no_nan((box1_area + box2_area), (IOU + 1)) giou = IOU - tf.math.divide_no_nan(C - temp, C) giou = tf.where(tf.math.is_nan(giou), 0.0, giou) giou = tf.where(tf.math.is_inf(giou), 0.0, giou) return giou
def calc_iou(input1, input2): input1 = K.expand_dims(input1, -2) # Добавляем одну размерность xy1 = input1[..., :2] # Получаем координаты x,y центра wh1 = input1[..., 2:4] # Получаем значения высоты и ширины wh_half1 = wh1 / 2. # Делим значения высоты и ширины пополам top_left1 = xy1 - wh_half1 # Получаем значение, соответствующее верхнему левому углу right_bottom1 = xy1 + wh_half1 # Получаем значение, соответствующее правому нижнему углу input2 = K.expand_dims(input2, 0) # Добавляем одну размерность xy2 = input2[..., :2] # Получаем координаты x,y центра wh2 = input2[..., 2:4] # Получаем значения высоты и ширины wh_half2 = wh2 / 2. # Делим значения высоты и ширины пополам top_left2 = xy2 - wh_half2 # Получаем значение, соответствующее верхнему левому углу right_bottom2 = xy2 + wh_half2 # Получаем значение, соответствующее правому нижнему углу intersect_mins = K.maximum(top_left1, top_left2) # Берем максимальные координаты из левых верхних углов intersect_maxes = K.minimum(right_bottom1, right_bottom2) # Берем минимальные координаты координаты из правых нижних углов intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) # Считаем ширину и высоту области пересечения intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] # Считаем площадь области пересечения area1 = wh1[..., 0] * wh1[..., 1] # Считаем площадь первых элементов area2 = wh2[..., 0] * wh2[..., 1] # Считаем площадь вторых элементов return intersect_area / (area1 + area2 - intersect_area) # Возвращаем IoU
def iou(box1, box2): """ Implement the intersection over union (IoU) between box1 and box2 Arguments: box1 -- first box, list object with coordinates (x1, x2, y1, y2) box2 -- second box, list object with coordinates (x1, x2, y1, y2) """ # Calculate the (x1, x2, y1, x2) coordinates of the intersection of box1 and box2. Calculate its Area. xi1 = K.maximum(box1[:, 0], box2[:, 0]) xi2 = K.minimum(box1[:, 1], box2[:, 1]) yi1 = K.maximum(box1[:, 2], box2[:, 2]) yi2 = K.minimum(box1[:, 3], box2[:, 3]) inter_area = K.maximum(xi2 - xi1, 0) * K.maximum(yi2 - yi1, 0) # Calculate the Union area by using Formula: Union(A,B) = A + B - Inter(A,B) box1_area = (box1[:, 3] - box1[:, 2]) * (box1[:, 1] - box1[:, 0]) box2_area = (box2[:, 3] - box2[:, 2]) * (box2[:, 1] - box2[:, 0]) union_area = (box1_area + box2_area - inter_area) iou = inter_area / union_area return K.mean(iou)
def get_quat_magnitude(q, K, axis=-1, keepdims=False, force_positive=True): """ Get quaternion magnitude. `q` must be of shape (..., 4, ...) The axis of the quaternion is `axis`. `K` could be `numpy` or `keras.backend` """ sum_or_eps = K.sum(q ** 2, axis=axis, keepdims=keepdims) # prevent negative input to sqrt if force_positive: try: sum_or_eps = K.maximum(sum_or_eps, K.epsilon()) except AttributeError: # assume that K is numpy sum_or_eps = np.clip(sum_or_eps, np.finfo(sum_or_eps.dtype).eps, None) return K.sqrt(sum_or_eps)
def triplet_loss(y_true, y_pred, cosine=True, alpha=0.2): embedding_size = K.int_shape(y_pred)[-1] // 3 ind = int(embedding_size * 2) a_pred = y_pred[:, :embedding_size] p_pred = y_pred[:, embedding_size:ind] n_pred = y_pred[:, ind:] if cosine: positive_distance = 1 - K.sum((a_pred * p_pred), axis=-1) negative_distance = 1 - K.sum((a_pred * n_pred), axis=-1) else: positive_distance = K.sqrt(K.sum(K.square(a_pred - p_pred), axis=-1)) negative_distance = K.sqrt(K.sum(K.square(a_pred - n_pred), axis=-1)) loss = K.maximum(0.0, positive_distance - negative_distance + alpha) return loss
def quantile_loss(quantile, y_true, y_pred): """This function computes the quantile loss for a given quantile fraction. Parameters ---------- quantile : float in (0, 1) Quantile fraction to compute the loss. y_true : Keras tensor Keras tensor including the ground truth y_pred : Keras tensor Keras tensor including the predictions of a quantile model. """ error = (y_true - y_pred) return K.mean(K.maximum(quantile * error, (quantile - 1) * error))
def call(self, inputs): obs, act, adv, old_means, old_logvars, old_logp = inputs new_means, new_logvars = self.policy(obs) new_logp = self.logprob([act, new_means, new_logvars]) kl, entropy = self.kl_entropy( [old_means, old_logvars, new_means, new_logvars]) loss1 = -K.mean(adv * K.exp(new_logp - old_logp)) loss2 = K.mean(self.beta * kl) # TODO - Take mean before or after hinge loss? loss3 = self.eta * K.square( K.maximum(0.0, K.mean(kl) - 2.0 * self.kl_targ)) self.add_loss(loss1 + loss2 + loss3) return [kl, entropy]
def ciou(box_1, box_2): ### NOT COMPLETED box1_xy = box_1[..., :2] box1_wh = box_1[..., 2:4] box1_mins = box1_xy - box1_wh / 2. box1_maxes = box1_xy + box1_wh / 2. box2_xy = box_2[..., :2] box2_wh = box_2[..., 2:4] box2_mins = box2_xy - box2_wh / 2. box2_maxes = box2_xy + box2_wh / 2. intersect_mins = K.minimum(box1_mins, box2_mins) intersect_maxes = K.maximum(box1_maxes, box2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) C = intersect_wh[..., 0] * intersect_wh[..., 1] box1_area = box1_wh[..., 0] * box1_wh[..., 1] box2_area = box2_wh[..., 0] * box2_wh[..., 1] IOU = tf.convert_to_tensor(box_iou(box_1, box_2)) giou = IOU - (C - (box1_area + box2_area) / (IOU + 1)) / C #giou = tf.where(tf.math.is_nan(giou), 0.0, giou) #giou = tf.where(tf.math.is_inf(giou), 0.0, giou) return giou
def correlation_coefficient_loss(y_true, y_pred): #print(f'suhyun true: {y_true.type}, pred: {y_pred.type}') x = tf.convert_to_tensor(y_true, dtype=tf.float32) y = y_pred mx = K.mean(x) my = K.mean(y) xm, ym = x-mx, y-my r_num = K.sum(tf.multiply(xm,ym)) r_den = K.sqrt(tf.multiply(K.sum(K.square(xm)), K.sum(K.square(ym)))) r = r_num / r_den r = K.maximum(K.minimum(r, 1.0), -1.0) r = K.print_tensor(r, message='suhyun --- corr rank r = ') return 1 - K.square(r)
def call(self, X, training=None): uniform = K.random_uniform(self.logits.shape, K.epsilon(), 1.0) gumbel = -K.log(-K.log(uniform)) temp = K.update(self.temp, K.maximum(self.min_temp, self.temp * self.alpha)) noisy_logits = (self.logits + gumbel) / temp samples = K.softmax(noisy_logits) discrete_logits = K.one_hot(K.argmax(self.logits), self.logits.shape[1]) self.selections = K.in_train_phase(samples, discrete_logits, training) Y = K.dot(X, K.transpose(self.selections)) # Y = K.sum(self.selections, axis=0) * X return Y
def __call__(self, y_true, y_pred): anchor, positive, negative = tf.unstack(y_pred) anchor_positive_distance = euclidean_distance(anchor, positive) anchor_negative_distance = euclidean_distance(anchor, negative) positive_negative_distance = euclidean_distance(positive, negative) minimum_distance = K.min( [anchor_negative_distance, positive_negative_distance], axis=-1, keepdims=True) return K.mean( K.maximum( anchor_positive_distance - minimum_distance + self.margin, 0))
def gumbel_softmax(x, tau, from_logits=False, straight_through=False): # ref: https://arxiv.org/abs/1611.01144 eps = 1e-20 u = K.random_uniform(K.shape(x), eps, 1 - eps) if not from_logits: x = K.log(K.maximum(eps, x)) y = x - K.log(-K.log(u)) if tau > 0: if straight_through: return combine_value_gradient(hardmax(y), K.softmax(y / tau, axis=-1)) else: return K.softmax(y / tau, axis=-1) else: return hardmax(y)
def box_iou(self, b1, b2): b1 = K.expand_dims(b1, -2) b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh / 2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half b2 = K.expand_dims(b2, 0) b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh / 2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half intersect_mins = K.maximum(b1_mins, b2_mins) intersect_maxes = K.minimum(b1_maxes, b2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] iou = intersect_area / (b1_area + b2_area - intersect_area) return iou
def custom(y_true, y_pred): # define custom loss term_1 = 0.5*K.sum(K.square(w)) term_2 = 0.5*K.sum(K.square(V)) term_3 = 1/self.nu*K.mean(K.maximum(0.0, self.r-(y_pred))) term_4 = -1*self.r # update r # a = K.max(y_pred, axis=1) # self.r = tfp.stats.percentile(a, self.nu*100) # a = K.print_tensor(a,[a]) return (term_1 + term_2 + term_3 + term_4)
def box_iou(b1, b2): """Return iou tensor Parameters ---------- b1: tensor, shape=(i1,...,iN, 4), xywh b2: tensor, shape=(j, 4), xywh Returns ------- iou: tensor, shape=(i1,...,iN, j) """ # Expand dim to apply broadcasting. b1 = K.expand_dims(b1, -2) b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh / 2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half # Expand dim to apply broadcasting. b2 = K.expand_dims(b2, 0) b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh / 2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half intersect_mins = K.maximum(b1_mins, b2_mins) intersect_maxes = K.minimum(b1_maxes, b2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] iou = intersect_area / (b1_area + b2_area - intersect_area) return iou
def compute_fd_loss(boxes, scores, annotations, iou_threshold=0.75): """compute the overlap of boxes with annotations""" iou = overlap(boxes, annotations) max_iou = K.max(iou, axis=1, keepdims=True) targets = K.cast(K.greater_equal(max_iou, iou_threshold), K.floatx()) # compute the loss loss = focal(targets, scores) # alpha=self.alpha, gamma=self.gamma) # compute the normalizer: the number of cells present in the image normalizer = K.cast(K.shape(annotations)[0], K.floatx()) normalizer = K.maximum(K.cast_to_floatx(1.0), normalizer) return K.sum(loss) / normalizer
def lm_acc(y_true, y_pred): """ acc 계산 함수 :param y_true: 정답 (bs, n_seq) :param y_pred: 예측 값 (bs, n_seq, n_vocab) """ # 정답 여부 확인 y_pred_class = tf.cast(K.argmax(y_pred, axis=-1), tf.float32) matches = tf.cast(K.equal(y_true, y_pred_class), tf.float32) # pad(0) 인 부분 mask mask = tf.cast(tf.math.not_equal(y_true, 0), dtype=matches.dtype) matches *= mask # 정확도 계산 accuracy = K.sum(matches) / K.maximum(K.sum(mask), 1) return accuracy
def margin_loss(y_true, y_pred): """ Implements the margin loss used by Sabour et al. in "Dynamic Routing Between Capsules" (https://arxiv.org/abs/1710.09829) Arguments ---- y_true: Keras Tensor Contains the label informations y_true.shape = [batchsize, n_classes] y_pred: Keras Tensor Prediction of the model y_pred.shape = [batchsize, n_classes] Return ---- A scalar loss value. """ L = y_true * K.square(K.maximum(0., 0.9 - y_pred)) + \ 0.5 * (1 - y_true) * K.square(K.maximum(0., y_pred - 0.1)) return K.mean(K.sum(L, 1))
def __call__(self, y_true, y_pred): anchor, positive, negative = tf.unstack(y_pred) anchor_positive_distance = euclidean_distance(anchor, positive) anchor_negative_distance = euclidean_distance(anchor, negative) positive_negative_distance = euclidean_distance(positive, negative) stacked_an_pn_distance = [ anchor_negative_distance, positive_negative_distance ] return K.mean( K.maximum( anchor_positive_distance - stacked_an_pn_distance + self.margin, 0))
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.initial_decay > 0: lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, K.dtype(self.decay)))) t = K.cast(self.iterations, K.floatx()) + 1 lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / (1. - K.pow(self.beta_1, t))) ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] if self.amsgrad: vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] else: vhats = [K.zeros(1) for _ in params] self.weights = [self.iterations] + ms + vs + vhats for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) if self.amsgrad: vhat_t = K.maximum(vhat, v_t) p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) self.updates.append(K.update(vhat, vhat_t)) else: p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) clptrkey = set_pattern_find(p.name,self.clips.keys()) if self.clips_val and clptrkey: c = K.eval(self.clips[clptrkey]) if self.verbose>0: print("Clipping variable",p.name," to ", c ) new_p = K.clip(new_p, c[0], c[1]) self.updates.append(K.update(p, new_p)) return self.updates
def get_updates(self, params, loss): grads = self.get_gradients(loss, params) self.updates = [K.update_add(self.iterations, 1)] lr = self.lr if self.inital_decay > 0: lr *= (1. / (1. + self.decay * self.iterations)) t = self.iterations + 1 lr_t = lr * K.sqrt(1. - K.pow(self.beta_2, t)) / ( 1. - K.pow(self.beta_1, t)) shapes = [K.get_variable_shape(p) for p in params] ms = [K.zeros(shape) for shape in shapes] vs = [K.zeros(shape) for shape in shapes] f = K.variable(0) d = K.variable(1) self.weights = [self.iterations] + ms + vs + [f, d] cond = K.greater(t, K.variable(1)) small_delta_t = K.switch(K.greater(loss, f), self.small_k + 1, 1. / (self.big_K + 1)) big_delta_t = K.switch(K.greater(loss, f), self.big_K + 1, 1. / (self.small_k + 1)) c_t = K.minimum(K.maximum(small_delta_t, loss / (f + self.epsilon)), big_delta_t) f_t = c_t * f r_t = K.abs(f_t - f) / (K.minimum(f_t, f)) d_t = self.beta_3 * d + (1 - self.beta_3) * r_t f_t = K.switch(cond, f_t, loss) d_t = K.switch(cond, d_t, K.variable(1.)) self.updates.append(K.update(f, f_t)) self.updates.append(K.update(d, d_t)) for p, g, m, v in zip(params, grads, ms, vs): m_t = (self.beta_1 * m) + (1. - self.beta_1) * g v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) p_t = p - lr_t * m_t / (d_t * K.sqrt(v_t) + self.epsilon) self.updates.append(K.update(m, m_t)) self.updates.append(K.update(v, v_t)) new_p = p_t self.updates.append(K.update(p, new_p)) return self.updates
def f(_, y_pred): vector_size = y_pred.shape[-1] // 3 anchor = y_pred[:, :vector_size] positive = y_pred[:, vector_size:2*vector_size] negative = y_pred[:, 2*vector_size:] # Normalize # anchor = tf.math.l2_normalize(anchor) positive = tf.math.l2_normalize(positive) negative = tf.math.l2_normalize(negative) # Compute distances # pos_dist = K.sqrt(K.sum(K.square(anchor - positive), axis=-1)) neg_dist = K.sqrt(K.sum(K.square(anchor - negative), axis=-1)) return K.maximum(pos_dist - neg_dist + margin, 0)