def policy_loss_with_metrics(self, Adv, A=None): """ This method constructs the policy loss as a scalar-valued Tensor, together with a dictionary of metrics (also scalars). This method may be overridden to construct a custom policy loss and/or to change the accompanying metrics. Parameters ---------- Adv : 1d Tensor, shape: [batch_size] A batch of advantages. A : nd Tensor, shape: [batch_size, ...] A batch of actions taken under the behavior policy. For some choices of policy loss, e.g. ``update_strategy='sac'`` this input is ignored. Returns ------- loss, metrics : (Tensor, dict of Tensors) The policy loss along with some metrics, which is a dict of type ``{name <str>: metric <Tensor>}``. The loss and each of the metrics (dict values) are scalar Tensors, i.e. Tensors with ``ndim=0``. The ``loss`` is passed to a keras Model using ``train_model.add_loss(loss)``. Similarly, each metric in the metric dict is passed to the model using ``train_model.add_metric(metric, name=name, aggregation='mean')``. """ if K.ndim(Adv) == 2: check_tensor(Adv, axis_size=1, axis=1) Adv = K.squeeze(Adv, axis=1) check_tensor(Adv, ndim=1) if self.update_strategy == 'vanilla': assert A is not None log_pi = self.dist.log_proba(A) check_tensor(log_pi, same_as=Adv) entropy = K.mean(self.dist.entropy()) # flip sign to get loss from objective loss = -K.mean(Adv * log_pi) + self.entropy_beta * entropy # no metrics related to behavior_dist since its not used in loss metrics = {'policy/entropy': entropy} elif self.update_strategy == 'ppo': assert A is not None log_pi = self.dist.log_proba(A) log_pi_old = K.stop_gradient(self.target_dist.log_proba(A)) check_tensor(log_pi, same_as=Adv) check_tensor(log_pi_old, same_as=Adv) eps = self.ppo_clip_eps ratio = K.exp(log_pi - log_pi_old) ratio_clip = K.clip(ratio, 1 - eps, 1 + eps) check_tensor(ratio, same_as=Adv) check_tensor(ratio_clip, same_as=Adv) clip_objective = K.mean(K.minimum(Adv * ratio, Adv * ratio_clip)) entropy = K.mean(self.dist.entropy()) kl_div = K.mean(self.target_dist.kl_divergence(self.dist)) # flip sign to get loss from objective loss = -(clip_objective + self.entropy_beta * entropy) metrics = {'policy/entropy': entropy, 'policy/kl_div': kl_div} elif self.update_strategy == 'sac': self.logger.debug("using update_strategy 'sac'") loss = -K.mean(Adv) metrics = {'policy/entropy': K.mean(self.dist.entropy())} elif self.update_strategy == 'cross_entropy': raise NotImplementedError('cross_entropy') else: raise ValueError( "unknown update_strategy '{}'".format(self.update_strategy)) # rename check_tensor(loss, ndim=0) loss = tf.identity(loss, name='policy/loss') return loss, metrics
def yolo_loss(args, anchors, num_classes, rescore_confidence=False, print_loss=False): """YOLO localization loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. detectors_mask : array 0/1 mask for detector positions where there is a matching ground truth. matching_true_boxes : array Corresponding ground truth boxes for positive detector positions. Already adjusted for conv height and width. anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. print_loss : bool, default=False If True then use a tf.Print() to print the loss components. Returns ------- mean_loss : float mean localization loss across minibatch """ (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args num_anchors = len(anchors) object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( yolo_output, anchors, num_classes) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo_head. yolo_output_shape = K.shape(yolo_output) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate( (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLO does not use binary cross-entropy here. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - detectors_mask)) no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * detectors_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * detectors_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLO does not use categorical cross-entropy loss here. matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) classification_loss = (class_scale * detectors_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = matching_true_boxes[..., 0:4] coordinates_loss = (coordinates_scale * detectors_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) classification_loss_sum = K.sum(classification_loss) coordinates_loss_sum = K.sum(coordinates_loss) total_loss = 0.5 * ( confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) if print_loss: total_loss = tf.Print( total_loss, [ total_loss, confidence_loss_sum, classification_loss_sum, coordinates_loss_sum ], message='yolo_loss, conf_loss, class_loss, box_coord_loss:') return total_loss
def box_diou(b_true, b_pred, use_ciou=False): """ Calculate DIoU/CIoU loss on anchor boxes Reference Paper: "Distance-IoU Loss: Faster and Better Learning for Bounding Box Regression" https://arxiv.org/abs/1911.08287 Parameters ---------- b_true: GT boxes tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh b_pred: predict boxes tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh use_ciou: bool flag to indicate whether to use CIoU loss type Returns ------- diou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1) """ b_true_xy = b_true[..., :2] b_true_wh = b_true[..., 2:4] b_true_wh_half = b_true_wh / 2. b_true_mins = b_true_xy - b_true_wh_half b_true_maxes = b_true_xy + b_true_wh_half b_pred_xy = b_pred[..., :2] b_pred_wh = b_pred[..., 2:4] b_pred_wh_half = b_pred_wh / 2. b_pred_mins = b_pred_xy - b_pred_wh_half b_pred_maxes = b_pred_xy + b_pred_wh_half intersect_mins = K.maximum(b_true_mins, b_pred_mins) intersect_maxes = K.minimum(b_true_maxes, b_pred_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b_true_area = b_true_wh[..., 0] * b_true_wh[..., 1] b_pred_area = b_pred_wh[..., 0] * b_pred_wh[..., 1] union_area = b_true_area + b_pred_area - intersect_area # calculate IoU, add epsilon in denominator to avoid dividing by 0 iou = intersect_area / (union_area + K.epsilon()) # box center distance center_distance = K.sum(K.square(b_true_xy - b_pred_xy), axis=-1) # get enclosed area enclose_mins = K.minimum(b_true_mins, b_pred_mins) enclose_maxes = K.maximum(b_true_maxes, b_pred_maxes) enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0) # get enclosed diagonal distance enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1) # calculate DIoU, add epsilon in denominator to avoid dividing by 0 diou = iou - 1.0 * (center_distance) / (enclose_diagonal + K.epsilon()) if use_ciou: # calculate param v and alpha to extend to CIoU v = 4 * K.square( tf.math.atan2(b_true_wh[..., 0], b_true_wh[..., 1]) - tf.math.atan2(b_pred_wh[..., 0], b_pred_wh[..., 1])) / (math.pi * math.pi) # a trick: here we add an non-gradient coefficient w^2+h^2 to v to customize it's back-propagate, # to match related description for equation (12) in original paper # # # v'/w' = (8/pi^2) * (arctan(wgt/hgt) - arctan(w/h)) * (h/(w^2+h^2)) (12) # v'/h' = -(8/pi^2) * (arctan(wgt/hgt) - arctan(w/h)) * (w/(w^2+h^2)) # # The dominator w^2+h^2 is usually a small value for the cases # h and w ranging in [0; 1], which is likely to yield gradient # explosion. And thus in our implementation, the dominator # w^2+h^2 is simply removed for stable convergence, by which # the step size 1/(w^2+h^2) is replaced by 1 and the gradient direction # is still consistent with Eqn. (12). v = v * tf.stop_gradient(b_pred_wh[..., 0] * b_pred_wh[..., 0] + b_pred_wh[..., 1] * b_pred_wh[..., 1]) alpha = v / (1.0 - iou + v) diou = diou - alpha * v diou = K.expand_dims(diou, -1) return diou
def cyclical_mae_rad(y_true, y_pred): return K.mean(K.minimum( K.abs(y_pred - y_true), K.minimum(K.abs(y_pred - y_true + 2 * np.pi), K.abs(y_pred - y_true - 2 * np.pi))), axis=-1)
def sigmoid_iou_loss(y_true, y_pred): return 1 - K.sum(K.minimum(y_true, y_pred)) / K.sum( K.maximum(y_true, y_pred))
def ignore_background_loss(y_true, y_pred): # y_true = maximum(y_true, epsilon()) dont_cares = minimum(1.0, y_true) return sum(abs(y_pred - y_true) * dont_cares) / sum(dont_cares)
def yolov2_loss(detector_mask, matching_true_boxes, class_one_hot, true_boxes_grid, y_pred, info=False): """ Calculate YOLO V2 loss from prediction (y_pred) and ground truth tensors (detector_mask, matching_true_boxes, class_one_hot, true_boxes_grid,) Parameters ---------- - detector_mask : tensor, shape (batch, size, GRID_W, GRID_H, anchors_count, 1) 1 if bounding box detected by grid cell, else 0 - matching_true_boxes : tensor, shape (batch_size, GRID_W, GRID_H, anchors_count, 5) Contains adjusted coords of bounding box in YOLO format - class_one_hot : tensor, shape (batch_size, GRID_W, GRID_H, anchors_count, class_count) One hot representation of bounding box label - true_boxes_grid : annotations : tensor (shape : batch_size, max annot, 5) true_boxes_grid format : x, y, w, h, c (coords unit : grid cell) - y_pred : prediction from model. tensor (shape : batch_size, GRID_W, GRID_H, anchors count, (5 + labels count) - info : boolean. True to get some infox about loss value Returns ------- - loss : scalar - sub_loss : sub loss list : coords loss, class loss and conf loss : scalar """ # anchors tensor anchors = np.array(ANCHORS) anchors = anchors.reshape(len(anchors)//2, 2) # grid coords tensor ---> GRID_W * GRID*H grid # tf.tile(input, multiples, name=None) # left up corner coord , total GRID_W * GRID*H * anchor_count coord_x = tf.cast(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)), tf.float32) coord_y = tf.transpose(coord_x, (0,2,1,3,4)) coords = tf.tile(tf.concat([coord_x, coord_y], -1), [y_pred.shape[0], 1, 1, 5, 1]) # coordinate loss # box regression # bx = (sigmoid(tx) + cx ) /W # bw = pw * e^tw # pw is anchors W, cx is left up of coord , tx and tw are pred offset value, W is feature map width # in this case, we don't multipy width, because the the coord in matching value also is during 0~16 pred_xy = K.sigmoid(y_pred[:,:,:,:,0:2]) # adjust center coords between 0 and 1 pred_xy = (pred_xy + coords) # add cell coord for comparaison with ground truth. New coords in grid cell unit pred_wh = K.exp(y_pred[:,:,:,:,2:4]) * anchors # adjust width and height for comparaison with ground truth. New coords in grid cell unit # pred_wh = (pred_wh * anchors) # unit: grid cell nb_detector_mask = K.sum(tf.cast(detector_mask>0.0, tf.float32)) xy_loss = LAMBDA_COORD*K.sum(detector_mask*K.square(matching_true_boxes[...,:2] - pred_xy))/(nb_detector_mask + 1e-6) # Non /2 wh_loss = LAMBDA_COORD * K.sum(detector_mask * K.square(K.sqrt(matching_true_boxes[...,2:4])- K.sqrt(pred_wh))) / (nb_detector_mask + 1e-6) coord_loss = xy_loss + wh_loss # class loss pred_box_class = y_pred[...,5:] true_box_class = tf.argmax(class_one_hot, -1) # class_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class) class_loss = K.sparse_categorical_crossentropy(target=true_box_class, output=pred_box_class, from_logits=True) class_loss = K.expand_dims(class_loss, -1)*detector_mask class_loss = LAMBDA_CLASS * K.sum(class_loss) / (nb_detector_mask + 1e-6) # confidence loss pred_conf = K.sigmoid(y_pred[..., 4:5]) # only two class : object or background # for each detector : iou between prediction and ground truth x1 = matching_true_boxes[...,0] y1 = matching_true_boxes[...,1] w1 = matching_true_boxes[...,2] h1 = matching_true_boxes[...,3] x2 = pred_xy[...,0] y2 = pred_xy[...,1] w2 = pred_wh[...,0] h2 = pred_wh[...,1] ious = iou(x1, y1, w1, h1, x2, y2, w2, h2) ious = K.expand_dims(ious, -1) # for each detector: best ious between pred and true_boxes pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxe_shape = K.int_shape(true_boxes_grid) true_boxes_grid = K.reshape(true_boxes_grid, [true_boxe_shape[0], 1, 1, 1, true_boxe_shape[1], true_boxe_shape[2]]) true_xy = true_boxes_grid[...,0:2] true_wh = true_boxes_grid[...,2:4] true_wh_half = true_wh * 0.5 true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) # shape : m, GRID_W, GRID_H, BOX, max_annot, 2 intersect_maxes = K.minimum(pred_maxes, true_maxes) # shape : m, GRID_W, GRID_H, BOX, max_annot, 2 intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) # shape : m, GRID_W, GRID_H, BOX, max_annot, 1 intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] # shape : m, GRID_W, GRID_H, BOX, max_annot, 1 pred_areas = pred_wh[..., 0] * pred_wh[..., 1] # shape : m, GRID_W, GRID_H, BOX, 1, 1 true_areas = true_wh[..., 0] * true_wh[..., 1] # shape : m, GRID_W, GRID_H, BOX, max_annot, 1 union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # shape : m, GRID_W, GRID_H, BOX, max_annot, 1 best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # shape : m, GRID_W, GRID_H, BOX, 1 # no object confidence loss no_object_detection = K.cast(best_ious < 0.6, K.dtype(best_ious)) noobj_mask = no_object_detection * (1 - detector_mask) nb_noobj_mask = K.sum(tf.cast(noobj_mask > 0.0, tf.float32)) noobject_loss = LAMBDA_NOOBJECT * K.sum(noobj_mask * K.square(-pred_conf)) / (nb_noobj_mask + 1e-6) # object confidence loss object_loss = LAMBDA_OBJECT * K.sum(detector_mask * K.square(ious - pred_conf)) / (nb_detector_mask + 1e-6) # total confidence loss conf_loss = noobject_loss + object_loss # total loss loss = conf_loss + class_loss + coord_loss sub_loss = [conf_loss, class_loss, coord_loss] if info: print('conf_loss : {:.4f}'.format(conf_loss)) print('class_loss : {:.4f}'.format(class_loss)) print('coord_loss : {:.4f}'.format(coord_loss)) print(' xy_loss : {:.4f}'.format(xy_loss)) print(' wh_loss : {:.4f}'.format(wh_loss)) print('--------------------') print('total loss : {:.4f}'.format(loss)) # display masks for each anchors for i in range(len(anchors)): f, (ax1, ax2, ax3) = plt.subplot(1,3,figsize=(10,5)) # https://blog.csdn.net/Strive_For_Future/article/details/115052014?ops_request_misc=%257B%2522request%255Fid%2522%253A%2522161883865316780262527067%2522%252C%2522scm%2522%253A%252220140713.130102334.pc%255Fall.%2522%257D&request_id=161883865316780262527067&biz_id=0&utm_medium=distribute.pc_search_result.none-task-blog-2~all~first_rank_v2~rank_v29-2-115052014.first_rank_v2_pc_rank_v29&utm_term=f.tight_layout&spm=1018.2226.3001.4187 f.tight_layout() f.suptitle("MASKS FOR ANCHOR {} :".format(anchors[i,...])) ax1.matshow((K.sum(detector_mask[0,:,:,i], axis=2)), cmap='Greys', vmin=0, vmax=1) ax1.set_title('detector_mask, count : {}'.format(K.sum(tf.cast(detector_mask[0,:,:,i] > 0., tf.int32)))) ax1.xaxis.set_ticks_position('bottom') ax2.matshow((K.sum(no_object_detection[0,:,:,i], axis=2)), cmap='Greys', vmin=0, vmax=1) ax2.set_title('no_object_detection mask') ax2.xaxis.set_ticks_position('bottom') ax3.matshow((K.sum(noobj_mask[0,:,:,i], axis=2)), cmap='Greys', vmin=0, vmax=1) ax3.set_title('noobj_mask') ax3.xaxis.set_ticks_position('bottom') return loss, sub_loss
def exploss(y_true, y_pred): return K.maximum(K.minimum(K.exp(-y_true * y_pred), 1e3), 1e-6)
def q_loss(y_true, y_pred): y_true = denormalize(y_true, 0, N) y_pred = denormalize(y_pred, 0, N) return K.maximum(y_true, y_pred) / K.minimum(y_true, y_pred)
def mrelu(x): return K.minimum(K.maximum(1 - x, 0), K.maximum(1 + x, 0))
def call(self, inputs, mask=None, **kwargs): if isinstance(inputs, list): inputs, positions = inputs positions = K.cast(positions, 'int32') mask = mask[1] else: positions = None input_len = K.shape(inputs)[1] if self.attention_type == SeqSelfAttention.ATTENTION_TYPE_ADD: e = self._call_additive_emission(inputs) elif self.attention_type == SeqSelfAttention.ATTENTION_TYPE_MUL: e = self._call_multiplicative_emission(inputs) if self.attention_activation is not None: e = self.attention_activation(e) e = K.exp(e - K.max(e, axis=-1, keepdims=True)) if self.attention_width is not None: ones = tf.ones((input_len, input_len)) if self.history_only: local = tf.linalg.band_part( ones, K.minimum(input_len, self.attention_width - 1), 0, ) else: local = tf.linalg.band_part( ones, K.minimum(input_len, self.attention_width // 2), K.minimum(input_len, (self.attention_width - 1) // 2), ) e = e * K.expand_dims(local, 0) if mask is not None: mask = K.cast(mask, K.floatx()) mask = K.expand_dims(mask) e = K.permute_dimensions( K.permute_dimensions(e * mask, (0, 2, 1)) * mask, (0, 2, 1)) # a_{t} = \text{softmax}(e_t) s = K.sum(e, axis=-1) s = K.tile(K.expand_dims(s, axis=-1), K.stack([1, 1, input_len])) a = e / (s + K.epsilon()) # l_t = \sum_{t'} a_{t, t'} x_{t'} v = K.batch_dot(a, inputs) if self.attention_regularizer_weight > 0.0: self.add_loss(self._attention_regularizer(a)) if positions is not None: pos_num = K.shape(positions)[1] batch_indices = K.tile( K.expand_dims(K.arange(K.shape(inputs)[0]), axis=-1), K.stack([1, pos_num])) pos_indices = K.stack([batch_indices, positions], axis=-1) v = tf.gather_nd(v, pos_indices) a = tf.gather_nd(a, pos_indices) if self.return_attention: return [v, a] return v
def detection_loss(mask, boxes, one_hot, grid, y_pred): anchors = config.anchors size = config.image_size // config.scale anchors = anchors.reshape(len(anchors) // 2, 2) coord_x = tf.cast( tf.reshape(tf.tile(tf.range(size), [size]), (1, size, size, 1, 1)), tf.float32) coord_y = tf.transpose(coord_x, (0, 2, 1, 3, 4)) coords = tf.tile( tf.concat([coord_x, coord_y], -1), [y_pred.shape[0], 1, 1, len(anchors), 1]) pred_xy = backend.sigmoid(y_pred[:, :, :, :, 0:2]) pred_xy = (pred_xy + coords) pred_wh = backend.exp(y_pred[:, :, :, :, 2:4]) * anchors nb_mask = backend.sum(tf.cast(mask > 0.0, tf.float32)) xy_loss = backend.sum( mask * backend.square(boxes[..., :2] - pred_xy)) / (nb_mask + 1e-6) wh_loss = backend.sum( mask * backend.square(backend.sqrt(boxes[..., 2:4]) - backend.sqrt(pred_wh))) / (nb_mask + 1e-6) coord_loss = xy_loss + wh_loss pred_box_class = y_pred[..., 5:] true_box_class = tf.argmax(one_hot, -1) class_loss = backend.sparse_categorical_crossentropy( true_box_class, pred_box_class, True) class_loss = backend.expand_dims(class_loss, -1) * mask class_loss = backend.sum(class_loss) / (nb_mask + 1e-6) pred_conf = backend.sigmoid(y_pred[..., 4:5]) x1 = boxes[..., 0] y1 = boxes[..., 1] w1 = boxes[..., 2] h1 = boxes[..., 3] x2 = pred_xy[..., 0] y2 = pred_xy[..., 1] w2 = pred_wh[..., 0] h2 = pred_wh[..., 1] x_min_1 = x1 - 0.5 * w1 x_max_1 = x1 + 0.5 * w1 y_min_1 = y1 - 0.5 * h1 y_max_1 = y1 + 0.5 * h1 x_min_2 = x2 - 0.5 * w2 x_max_2 = x2 + 0.5 * w2 y_min_2 = y2 - 0.5 * h2 y_max_2 = y2 + 0.5 * h2 intersection_x = backend.minimum(x_max_1, x_max_2) - backend.maximum( x_min_1, x_min_2) intersection_y = backend.minimum(y_max_1, y_max_2) - backend.maximum( y_min_1, y_min_2) intersection = intersection_x * intersection_y union = w1 * h1 + w2 * h2 - intersection iou = intersection / (union + 1e-6) iou = backend.expand_dims(iou, -1) pred_xy = backend.expand_dims(pred_xy, 4) pred_wh = backend.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_min = pred_xy - pred_wh_half pred_max = pred_xy + pred_wh_half true_boxes_shape = backend.int_shape(grid) grid = backend.reshape(grid, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = grid[..., 0:2] true_wh = grid[..., 2:4] true_wh_half = true_wh * 0.5 true_min = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersection_min = backend.maximum(pred_min, true_min) intersection_max = backend.minimum(pred_max, true_maxes) intersect_wh = backend.maximum(intersection_max - intersection_min, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas best_iou = backend.max(iou_scores, axis=4) best_iou = backend.expand_dims(best_iou) no_object_detection = backend.cast(best_iou < 0.6, backend.dtype(best_iou)) no_obj_mask = no_object_detection * (1 - mask) nb_no_obj_mask = backend.sum(backend.cast(no_obj_mask > 0.0, 'float32')) no_object_loss = backend.sum( no_obj_mask * backend.square(-pred_conf)) / (nb_no_obj_mask + 1e-6) object_loss = backend.sum( mask * backend.square(iou - pred_conf)) / (nb_mask + 1e-6) conf_loss = no_object_loss + object_loss loss = conf_loss + class_loss + coord_loss return loss
def ramp(y_true, y_pred): beta = 1.0 return K.mean(K.minimum(1., K.maximum(0., 1. - beta * y_true * y_pred)), axis=-1)
def surrogate_loss(r, adv, prob, clip, c2): return -K.mean( K.minimum(r * adv, K.clip(r, min_value=1 - clip, max_value=1 + clip) * adv) + c2 * -(prob * K.log(prob + 1e-10)))
def _cross_entropy(self, y_true, y_pred): y_pred = K.maximum(K.minimum(y_pred, 1 - 1e-15), 1e-15) cross_entropy_loss = -K.sum(y_true * K.log(y_pred), axis=-1) return cross_entropy_loss
def call(self, a, **kwargs): states = kwargs['initial_state'] r_tm1 = states[:self.nb_layers] c_tm1 = states[self.nb_layers:2 * self.nb_layers] e_tm1 = states[2 * self.nb_layers:3 * self.nb_layers] if self.extrap_start_time is not None: t = states[-1] a = K.switch( t >= self.t_extrap, states[-2], a ) # if past self.extrap_start_time, the previous prediction will be treated as the actual set_trace() c = [] r = [] e = [] # Update R units starting from the top for l in reversed(range(self.nb_layers)): inputs = [r_tm1[l], e_tm1[l]] if l < self.nb_layers - 1: inputs.append(r_up) inputs = K.concatenate(inputs, axis=self.channel_axis) i = self.conv_layers['i'][l].call(inputs) f = self.conv_layers['f'][l].call(inputs) o = self.conv_layers['o'][l].call(inputs) _c = f * c_tm1[l] + i * self.conv_layers['c'][l].call(inputs) _r = o * self.LSTM_activation(_c) c.insert(0, _c) r.insert(0, _r) if l > 0: r_up = self.upsample.call(_r) # Update feedforward path starting from the bottom for l in range(self.nb_layers): ahat = self.conv_layers['ahat'][l].call(r[l]) if l == 0: ahat = K.minimum(ahat, self.pixel_max) frame_prediction = ahat # compute errors e_up = self.error_activation(ahat - a) e_down = self.error_activation(a - ahat) e.append(K.concatenate((e_up, e_down), axis=self.channel_axis)) if self.output_layer_num == l: if self.output_layer_type == 'A': output = a elif self.output_layer_type == 'Ahat': output = ahat elif self.output_layer_type == 'R': output = r[l] elif self.output_layer_type == 'E': output = e[l] if l < self.nb_layers - 1: a = self.conv_layers['a'][l].call(e[l]) a = self.pool.call(a) # target for next layer if self.output_layer_type is None: if self.output_mode == 'prediction': output = frame_prediction else: for l in range(self.nb_layers): layer_error = K.mean(K.batch_flatten(e[l]), axis=-1, keepdims=True) all_error = layer_error if l == 0 else K.concatenate( (all_error, layer_error), axis=-1) if self.output_mode == 'error': output = all_error else: output = K.concatenate( (K.batch_flatten(frame_prediction), all_error), axis=-1) states = r + c + e if self.extrap_start_time is not None: states += [frame_prediction, t + 1] set_trace() return output, states
def yolo2_loss(args, anchors, num_classes, label_smoothing=0, use_crossentropy_loss=False, use_crossentropy_obj_loss=False, rescore_confidence=False): """YOLOv2 loss function. Parameters ---------- yolo_output : tensor Final convolutional layer features. true_boxes : tensor Ground truth boxes tensor with shape [batch, num_true_boxes, 5] containing box x_center, y_center, width, height, and class. y_true : array output of preprocess_true_boxes, with shape [conv_height, conv_width, num_anchors, 6] anchors : tensor Anchor boxes for model. num_classes : int Number of object classes. rescore_confidence : bool, default=False If true then set confidence target to IOU of best predicted box with the closest matching ground truth box. Returns ------- total_loss : float total mean YOLOv2 loss across minibatch """ (yolo_output, true_boxes, y_true) = args num_anchors = len(anchors) yolo_output_shape = K.shape(yolo_output) input_shape = yolo_output_shape[1:3] * 32 object_scale = 5 no_object_scale = 1 class_scale = 1 coordinates_scale = 1 object_mask = y_true[..., 4:5] pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo2_head( yolo_output, anchors, num_classes, input_shape) # Unadjusted box predictions for loss. # TODO: Remove extra computation shared with yolo2_head. batch_size = yolo_output_shape[0] # batch size, tensor batch_size_f = K.cast(batch_size, K.dtype(yolo_output)) feats = K.reshape(yolo_output, [ -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, num_classes + 5 ]) pred_boxes = K.concatenate((K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) # TODO: Adjust predictions by image width/height for non-square images? # IOUs may be off due to different aspect ratio. # Expand pred x,y,w,h to allow comparison with ground truth. # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params pred_xy = K.expand_dims(pred_xy, 4) pred_wh = K.expand_dims(pred_wh, 4) pred_wh_half = pred_wh / 2. pred_mins = pred_xy - pred_wh_half pred_maxes = pred_xy + pred_wh_half true_boxes_shape = K.shape(true_boxes) # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params true_boxes = K.reshape(true_boxes, [ true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] ]) true_xy = true_boxes[..., 0:2] true_wh = true_boxes[..., 2:4] # Find IOU of each predicted box with each ground truth box. true_wh_half = true_wh / 2. true_mins = true_xy - true_wh_half true_maxes = true_xy + true_wh_half intersect_mins = K.maximum(pred_mins, true_mins) intersect_maxes = K.minimum(pred_maxes, true_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] pred_areas = pred_wh[..., 0] * pred_wh[..., 1] true_areas = true_wh[..., 0] * true_wh[..., 1] union_areas = pred_areas + true_areas - intersect_areas iou_scores = intersect_areas / union_areas # Best IOUs for each location. best_ious = K.max(iou_scores, axis=4) # Best IOU scores. best_ious = K.expand_dims(best_ious) # A detector has found an object if IOU > thresh for some true box. object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) # TODO: Darknet region training includes extra coordinate loss for early # training steps to encourage predictions to match anchor priors. # Determine confidence weights from object and no_object weights. # NOTE: YOLOv2 does not use binary cross-entropy. Here we try it. no_object_weights = (no_object_scale * (1 - object_detections) * (1 - object_mask)) if use_crossentropy_obj_loss: no_objects_loss = no_object_weights * K.binary_crossentropy( K.zeros(K.shape(pred_confidence)), pred_confidence, from_logits=False) if rescore_confidence: objects_loss = (object_scale * object_mask * K.binary_crossentropy( best_ious, pred_confidence, from_logits=False)) else: objects_loss = ( object_scale * object_mask * K.binary_crossentropy(K.ones(K.shape(pred_confidence)), pred_confidence, from_logits=False)) else: no_objects_loss = no_object_weights * K.square(-pred_confidence) if rescore_confidence: objects_loss = (object_scale * object_mask * K.square(best_ious - pred_confidence)) else: objects_loss = (object_scale * object_mask * K.square(1 - pred_confidence)) confidence_loss = objects_loss + no_objects_loss # Classification loss for matching detections. # NOTE: YOLOv2 does not use categorical cross-entropy loss. # Here we try it. matching_classes = K.cast(y_true[..., 5], 'int32') matching_classes = K.one_hot(matching_classes, num_classes) if label_smoothing: matching_classes = _smooth_labels(matching_classes, label_smoothing) if use_crossentropy_loss: classification_loss = ( class_scale * object_mask * K.expand_dims(K.categorical_crossentropy( matching_classes, pred_class_prob, from_logits=False), axis=-1)) else: classification_loss = (class_scale * object_mask * K.square(matching_classes - pred_class_prob)) # Coordinate loss for matching detection boxes. matching_boxes = y_true[..., 0:4] coordinates_loss = (coordinates_scale * object_mask * K.square(matching_boxes - pred_boxes)) confidence_loss_sum = K.sum(confidence_loss) / batch_size_f classification_loss_sum = K.sum(classification_loss) / batch_size_f coordinates_loss_sum = K.sum(coordinates_loss) / batch_size_f total_loss = 0.5 * (confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) # Fit for tf 2.0.0 loss shape total_loss = K.expand_dims(total_loss, axis=-1) return total_loss, coordinates_loss_sum, confidence_loss_sum, classification_loss_sum
def clipped_relu(inputs): return get( Lambda(lambda y: K.minimum(K.maximum(y, 0), 20), name='clipped_relu'))(inputs)
def vertebrae_classification_rate(y_true, y_pred): # y_true = K.maximum(y_true, K.epsilon()) dont_cares = minimum(1.0, y_true) return sum(cast(equal(round(y_pred), y_true), 'float32') * dont_cares) / sum(dont_cares)
def get_updates(self, loss, params): grads = self.get_gradients(loss, params) # first update the number of iterations self.updates = [K.update_add(self.iterations, 1)] if self.decay_epochs: ite_casted = K.cast(self.iterations, K.dtype(self.decay_epochs)) hit_decay_epoch = K.any(K.equal(ite_casted, self.decay_epochs)) #print(hit_decay_epoch) lr = K.switch(hit_decay_epoch, self.lr['all'] * self.decay['all'], self.lr['all']) #K.print_tensor(self.lr['all']) #a = K.switch(hit_decay_epoch, # K.print_tensor(self.lr['all'],message='Decays:'), # K.print_tensor(self.lr['all'],message=' ')) self.updates.append(K.update(self.lr['all'], lr)) shapes = [K.int_shape(p) for p in params] moments = [K.zeros(s) for s in shapes] self.weights = [self.iterations] + moments #print(self.weights) for p, g, m in zip(params, grads, moments): #print("HEREEEE:", p.name, g, m) lrptrkey = set_pattern_find(p.name, self.lr.keys()) if lrptrkey: if self.verbose > 0: print("Setting different learning rate for ", p.name, " : ", K.eval(self.lr[lrptrkey])) lr = self.lr[lrptrkey] dcptrkey = set_pattern_find(p.name, self.decay.keys()) if self.decay_epochs and dcptrkey: lr = K.switch(hit_decay_epoch, self.lr[lrptrkey] * self.decay[dcptrkey], self.lr[lrptrkey]) self.updates.append(K.update(self.lr[lrptrkey], lr)) if self.verbose > 0: print("Added decay to ", p.name, ": ", K.eval(lr), ",", self.decay[dcptrkey]) elif self.decay_epochs: lr = K.switch(hit_decay_epoch, self.lr[lrptrkey] * self.decay['all'], self.lr[lrptrkey]) self.updates.append(K.update(self.lr[lrptrkey], lr)) if self.verbose > 0: print("Added decay to ", p.name, ": ", K.eval(lr), ",", self.decay['all']) else: lr = self.lr[lrptrkey] else: lr = self.lr['all'] momptrkey = set_pattern_find(p.name, self.momentum.keys()) if momptrkey: if self.verbose > 0: print("Setting different momentum for ", p.name, " , ", K.eval(self.momentum[momptrkey])) momentum = self.momentum[momptrkey] else: momentum = self.momentum['all'] v = momentum * m - lr * g # velocity self.updates.append(K.update(m, v)) if self.nesterov: new_p = p + momentum * (momentum * m - lr * g) - lr * g else: new_p = p + momentum * m - lr * g # CHANGE CLIP _to_tensor = K.tensorflow_backend._to_tensor _clip_by_val = K.tf.clip_by_value margin = K.mean(K.abs(p * K.constant(self.UPCLIP))) min_value = _to_tensor(p - margin, p.dtype.base_dtype) max_value = _to_tensor(p + margin, p.dtype.base_dtype) max_v = K.maximum(min_value, max_value) min_v = K.minimum(min_value, max_value) new_p = _clip_by_val(new_p, min_v, max_v) # Apply constraints. if getattr(p, 'constraint', None) is not None: new_p = p.constraint(new_p) clptrkey = set_pattern_find(p.name, self.clips.keys()) if self.clips_val and clptrkey: if self.verbose > 0: print("Clipping variable", p.name, " to ", self.clips[clptrkey]) c = K.eval(self.clips[clptrkey]) new_p = K.clip(new_p, c[0], c[1]) #print("updates for ", p.name, " lr: ", K.eval(lr), " mom:", K.eval(momentum)) self.updates.append(K.update(p, new_p)) return self.updates
def loss(y_true, y_pred): prob = y_true * y_pred old_prob = y_true * old_prediction r = prob/(old_prob + 1e-10) return -K.mean(K.minimum(r * advantage, K.clip(r, min_value=1 - self.loss_clipping, max_value=1 + self.loss_clipping) * advantage) + self.entropy_loss * -(prob * K.log(prob + 1e-10)))
def __call__(self, x): reg_0 = math_ops.reduce_sum(math_ops.square(x)) reg_1 = math_ops.reduce_sum(math_ops.square(x - 1)) return self.k * K.minimum(reg_0, reg_1)
def clipped_relu(inp): relu = Lambda(lambda y: K.minimum(K.maximum(y, 0), 20))(inp) return relu
def filter_detections(boxes, classification, other=[], class_specific_filter=True, nms=True, score_threshold=0.05, max_detections=300, nms_threshold=0.5): """Filter detections using the boxes and classification values. Args: boxes (numpy.array): Tensor of shape ``(num_boxes, 4)`` containing the boxes in ``(x1, y1, x2, y2)`` format. classification (numpy.array): Tensor of shape ``(num_boxes, num_classes)`` containing the classification scores. other (list): List of tensors of shape ``(num_boxes, ...)`` to filter along with the boxes and classification scores. class_specific_filter (bool): Whether to perform filtering per class, or take the best scoring class and filter those. nms (bool): Whether to enable non maximum suppression. score_threshold (float): Threshold used to prefilter the boxes with. max_detections (int): Maximum number of detections to keep. nms_threshold (float): Threshold for the IoU value to determine when a box should be suppressed. Returns: list: A list of [``boxes, scores, labels, other[0], other[1], ...]``. ``boxes`` is shaped ``(max_detections, 4)`` and contains the ``(x1, y1, x2, y2)`` of the non-suppressed boxes. ``scores`` is shaped ``(max_detections,)`` and contains the scores of the predicted class. ``labels`` is shaped ``(max_detections,)`` and contains the predicted label. ``other[i]`` is shaped ``(max_detections, ...)`` and contains the filtered ``other[i]`` data. In case there are less than ``max_detections`` detections, the tensors are padded with -1's. """ def _filter_detections(scores, labels): # threshold based on score indices = tf.where(K.greater(scores, score_threshold)) if nms: filtered_boxes = tf.gather_nd(boxes, indices) filtered_scores = K.gather(scores, indices)[:, 0] # perform NMS nms_indices = tf.image.non_max_suppression( filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold) # filter indices based on NMS indices = K.gather(indices, nms_indices) # add indices to list of all indices labels = tf.gather_nd(labels, indices) indices = K.stack([indices[:, 0], labels], axis=1) return indices if class_specific_filter: all_indices = [] # perform per class filtering for c in range(K.int_shape(classification)[1]): scores = classification[:, c] labels = c * tf.ones((K.shape(scores)[0],), dtype='int64') all_indices.append(_filter_detections(scores, labels)) # concatenate indices to single tensor indices = K.concatenate(all_indices, axis=0) else: scores = K.max(classification, axis=1) labels = K.argmax(classification, axis=1) indices = _filter_detections(scores, labels) # select top k scores = tf.gather_nd(classification, indices) labels = indices[:, 1] scores, top_indices = tf.nn.top_k( scores, k=K.minimum(max_detections, K.shape(scores)[0])) # filter input using the final set of indices indices = K.gather(indices[:, 0], top_indices) boxes = K.gather(boxes, indices) labels = K.gather(labels, top_indices) other_ = [K.gather(o, indices) for o in other] # zero pad the outputs pad_size = K.maximum(0, max_detections - K.shape(scores)[0]) boxes = tf.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1) scores = tf.pad(scores, [[0, pad_size]], constant_values=-1) labels = tf.pad(labels, [[0, pad_size]], constant_values=-1) labels = K.cast(labels, 'int32') pads = lambda x: [[0, pad_size]] + [[0, 0] for _ in range(1, K.ndim(x))] other_ = [tf.pad(o, pads(o), constant_values=-1) for o in other_] # set shapes, since we know what they are boxes.set_shape([max_detections, 4]) scores.set_shape([max_detections]) labels.set_shape([max_detections]) for o, s in zip(other_, [list(K.int_shape(o)) for o in other]): o.set_shape([max_detections] + s[1:]) return [boxes, scores, labels] + other_
def box_ciou(b1, b2): """ 输入为: ---------- b1: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh b2: tensor, shape=(batch, feat_w, feat_h, anchor_num, 4), xywh 返回为: ------- ciou: tensor, shape=(batch, feat_w, feat_h, anchor_num, 1) """ #-----------------------------------------------------------# # 求出预测框左上角右下角 # b1_mins (batch, feat_w, feat_h, anchor_num, 2) # b1_maxes (batch, feat_w, feat_h, anchor_num, 2) #-----------------------------------------------------------# b1_xy = b1[..., :2] b1_wh = b1[..., 2:4] b1_wh_half = b1_wh / 2. b1_mins = b1_xy - b1_wh_half b1_maxes = b1_xy + b1_wh_half #-----------------------------------------------------------# # 求出真实框左上角右下角 # b2_mins (batch, feat_w, feat_h, anchor_num, 2) # b2_maxes (batch, feat_w, feat_h, anchor_num, 2) #-----------------------------------------------------------# b2_xy = b2[..., :2] b2_wh = b2[..., 2:4] b2_wh_half = b2_wh / 2. b2_mins = b2_xy - b2_wh_half b2_maxes = b2_xy + b2_wh_half #-----------------------------------------------------------# # 求真实框和预测框所有的iou # iou (batch, feat_w, feat_h, anchor_num) #-----------------------------------------------------------# intersect_mins = K.maximum(b1_mins, b2_mins) intersect_maxes = K.minimum(b1_maxes, b2_maxes) intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] b1_area = b1_wh[..., 0] * b1_wh[..., 1] b2_area = b2_wh[..., 0] * b2_wh[..., 1] union_area = b1_area + b2_area - intersect_area iou = intersect_area / K.maximum(union_area, K.epsilon()) #-----------------------------------------------------------# # 计算中心的差距 # center_distance (batch, feat_w, feat_h, anchor_num) #-----------------------------------------------------------# center_distance = K.sum(K.square(b1_xy - b2_xy), axis=-1) enclose_mins = K.minimum(b1_mins, b2_mins) enclose_maxes = K.maximum(b1_maxes, b2_maxes) enclose_wh = K.maximum(enclose_maxes - enclose_mins, 0.0) #-----------------------------------------------------------# # 计算对角线距离 # enclose_diagonal (batch, feat_w, feat_h, anchor_num) #-----------------------------------------------------------# enclose_diagonal = K.sum(K.square(enclose_wh), axis=-1) ciou = iou - 1.0 * (center_distance) / K.maximum(enclose_diagonal, K.epsilon()) v = 4 * K.square( tf.math.atan2(b1_wh[..., 0], K.maximum(b1_wh[..., 1], K.epsilon())) - tf.math.atan2(b2_wh[..., 0], K.maximum(b2_wh[..., 1], K.epsilon())) ) / (math.pi * math.pi) alpha = v / K.maximum((1.0 - iou + v), K.epsilon()) ciou = ciou - alpha * v ciou = K.expand_dims(ciou, -1) ciou = tf.where(tf.math.is_nan(ciou), tf.zeros_like(ciou), ciou) return ciou
def mymask(y_true): return K.minimum(y_true+1., 1.)
def bbox_ciou(bboxes1, bboxes2): """ Complete IoU @param bboxes1: (a, b, ..., 4) @param bboxes2: (A, B, ..., 4) x:X is 1:n or n:n or n:1 @return (max(a,A), max(b,B), ...) ex) (4,):(3,4) -> (3,) (2,1,4):(2,3,4) -> (2,3) """ xy1 = bboxes1[..., :2] wh_h1 = bboxes1[..., 2:4] * 0.5 xy2 = bboxes2[..., :2] wh_h2 = bboxes2[..., 2:4] * 0.5 bboxes1_area = bboxes1[..., 2] * bboxes1[..., 3] bboxes2_area = bboxes2[..., 2] * bboxes2[..., 3] lu1 = xy1 - wh_h1 rd1 = xy1 + wh_h1 lu2 = xy2 - wh_h2 rd2 = xy2 + wh_h2 left_up = K.maximum(lu1, lu2) right_down = K.minimum(rd1, rd2) inter_section = K.maximum(right_down - left_up, 0.0) inter_area = inter_section[..., 0] * inter_section[..., 1] union_area = bboxes1_area + bboxes2_area - inter_area iou = inter_area / (union_area + K.epsilon()) enclose_left_up = K.minimum(lu1, lu2) enclose_right_down = K.maximum(rd1, rd2) enclose_section = enclose_right_down - enclose_left_up c_2 = K.pow(enclose_section[..., 0], 2) + K.pow(enclose_section[..., 1], 2) center_diagonal = xy2 - xy1 rho_2 = K.pow(center_diagonal[..., 0], 2) + K.pow(center_diagonal[..., 1], 2) diou = iou - rho_2 / (c_2 + K.epsilon()) v = K.pow( (tf.math.atan(bboxes1[..., 2] / (bboxes1[..., 3] + K.epsilon())) - tf.math.atan(bboxes2[..., 2] / (bboxes2[..., 3] + K.epsilon()))) * 0.636619772, # 2/pi 2, ) alpha = v / (1 - iou + v + K.epsilon()) ciou = diou - alpha * v return ciou, iou
def on_batch_end(self, epoch, logs=None): logs = logs or {} self._step_num += 1 lrate = self._model_dim ** -.5 * K.minimum(self._step_num ** -.5, self._step_num * self._warmup_steps ** -1.5) K.set_value(self.model.optimizer.lr, lrate)
def clipped_relu(self, inputs): relu = Lambda(lambda y: K.minimum(K.maximum(y, 0), 20), name=f'clipped_relu_{self.clipped_relu_count}')(inputs) self.clipped_relu_count += 1 return relu
def deep_dream(): """ DeepDream is an artistic image-modification technique that uses the representations learned by convnets. First released by Google in the summer of 2015, this algorithm is very similar to the gradient ascent technique we viewed earlier to represent the patterns learned by individual filters during training (Chapter 5). There are a few differences to the algorithm: -> With DeepDream you try to maximise the activation of the entire layer rather than one specific filter, thus mixing together visualisations of a larger number of filters. -> You start not from a blank, slightly noisy input, but rather from an existing image - thus the resulting effects latch on to preexisting visual patterns, distorting elements of the image in a somewhat artistic fashion. -> The input images are processed at different scales (called octaves), which improves the quality of the visualisations. This function does not work due to version issues. :return: None """ # You won't be training a model for this application, so let's disable all training functionality before # starting K.set_learning_phase(0) model = inception_v3.InceptionV3(weights='imagenet', include_top=False) # In Chapter 5 we use the loss value to maximise the output of a specific filter. This time we'll attempt to # maximise the weighted sum of the L2 norm of the activations of a set of high-level layers. The set of layers # chosen will have a massive impact on the resulting modifications to the image, so make these params very # easily configurable. layers_contributions = { 'mixed2': 0.2, 'mixed3': 3.0, 'mixed4': 2.0, 'mixed5': 1.5 } layer_dict = dict([(layer.name, layer) for layer in model.layers]) # You'll define the loss by adding layer contributions to this scalar value. loss = K.variable(0.0) for layer_name in layers_contributions: coeff = layers_contributions[layer_name] # Retrieve the layer's output. activation = layer_dict[layer_name].output # Define the scaling factor and add the L2 norm of the features of a layer to the loss. You avoid boarder # artifacts by involving non-boarder pixels in the loss. scaling = K.prod(K.cast(K.shape(activation), 'float32')) loss = loss + coeff * K.sum(K.square( activation[:, 2:-2, 2:-2, :])) / scaling # Now we can set up the gradient ascent process. dream = model.input # Compute gradient of the dream w.r.t to the loss, then NORMALISE!!! grads = K.gradients(loss, dream)[0] grads /= K.minimum(K.mean(K.abs(grads)), 1e-7) # Now set up a Keras function to retrieve the value of the loss and gradients given an input image. outputs = [loss, grads] fetch_loss_and_grads = K.function([dream], outputs) def eval_loss_and_grads(x): """ This function is used to call the fetch_loss_and_grads function and package the outputs in an easy to use fashion. :param x: Input dream :return: The loss and the gradient of the layer w.r.t. the dream. """ outs = fetch_loss_and_grads([x]) loss_value = outs[0] grads_value = outs[1] return loss_value, grads_value def gradient_ascent(x, iterations, step, max_loss=None): """ This function runs gradient ascent for a number of iterations. :param x: Input dream :param iterations: Number of iterations to run gradient ascent for :param step: Step-size of the gradient ascent :param max_loss: Maximum loss we'll accept during the gradient ascent before stopping. :return: A modified version of the input dream """ for i in range(iterations): loss_value, grads_value = eval_loss_and_grads(x) if max_loss is not None and loss_value > max_loss: break print(f"...Loss value at {i}: {loss_value}") x += step * grads_value return x # Now we can begin programming the DeepDream algorithm itself. First we need to define a set of scales # (called octaves) at which to process the image. Each octave is 40% larger than the last. At each scale (from # smallest to largest) you run gradient ascent to maximise the loss you previously defined. To prevent artifacts # of up-scaling (blurriness and stretching) we'll re-inject the lost back into the image, which is possible # because you know what the original image should look like at a larger scale. step = 0.01 num_octave = 3 octave_scale = 1.4 iterations = 20 max_loss = 10.0 base_image_path = 'C:\\Users\\owatkins\\OneDrive - Analog Devices, Inc\\Documents\\Project Folder\\Tutorials and Courses\\Deep Learning with Python\\European_Landscape.jpg' print("Loading Base Image...") # Load the base image into Numpy array. img = preprocess_image_inception(base_image_path) print(f"Image Preprocessed: {img.dtype} of size: {img.shape}") # Prepare a list of shape tuples defining the different scales at which to run gradient ascent. original_shape = img.shape[1:3] successive_shapes = [original_shape] for i in range(1, num_octave): shape = tuple( [int(dim / (octave_scale**i)) for dim in original_shape]) successive_shapes.append(shape) # Reverse the list so that they run in ascending order. successive_shapes = successive_shapes[::-1] # Resize the Numpy array of the image to the smallest size. original_img = np.copy(img) shrunk_original_image = resize_img(original_img, successive_shapes[0]) # Run deep dream over all octaves. for shape in successive_shapes: print(f"Processing Image shape: {shape}") # Scales up the deep dream image img = resize_img(img, shape) # Run gradient ascent, altering the dream. img = gradient_ascent(img, iterations=iterations, step=step, max_loss=max_loss) # Scales up the smaller version of the original image: it will be pixellated. Compute the high-quality # version of the original image at this size. The difference between the two is the detail lost in # up-scaling. upscaled_shrunk_original_img = resize_img(shrunk_original_image, shape) same_size_original = resize_img(original_img, shape) lost_detail = same_size_original - upscaled_shrunk_original_img # Re-inject the lost detail back into the dream. Grab the shrunk_original_image and save the dream at this # octave img += lost_detail shrunk_original_image = resize_img(original_img, shape) save_img( img, fname= 'C:\\Users\\owatkins\\OneDrive - Analog Devices, Inc\\Documents\\Project Folder\\Tutorials and Courses\\Deep Learning with Python\\dream_at_scale_' + str(shape) + '.png') # Save the final dream. save_img( img, fname= 'C:\\Users\\owatkins\\OneDrive - Analog Devices, Inc\\Documents\\Project Folder\\Tutorials and Courses\\Deep Learning with Python\\Final_Dream.png' )