def _smooth_l1(y_true, y_pred): """ Compute the smooth L1 loss of y_pred w.r.t. y_true. Args y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive). y_pred: Tensor from the network of shape (B, N, 4). Returns The smooth L1 loss of y_pred w.r.t. y_true. """ # separate target and state regression = y_pred regression_target = y_true[:, :, :-1] anchor_state = y_true[:, :, -1] # filter out "ignore" anchors indices = backend.where(keras.backend.equal(anchor_state, 1)) regression = backend.gather_nd(regression, indices) regression_target = backend.gather_nd(regression_target, indices) # compute smooth L1 loss # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma # |x| - 0.5 / sigma / sigma otherwise regression_diff = regression - regression_target regression_diff = keras.backend.abs(regression_diff) regression_loss = backend.where( keras.backend.less(regression_diff, 1.0 / sigma_squared), 0.5 * sigma_squared * keras.backend.pow(regression_diff, 2), regression_diff - 0.5 / sigma_squared) # compute the normalizer: the number of positive anchors normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0]) normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx()) return keras.backend.sum(regression_loss) / normalizer
def _focal(y_true, y_pred): """ Compute the focal loss given the target tensor and the predicted tensor. As defined in https://arxiv.org/abs/1708.02002 Args y_true: Tensor of target data from the generator with shape (B, N, num_classes). y_pred: Tensor of predicted data from the network with shape (B, N, num_classes). Returns The focal loss of y_pred w.r.t. y_true. """ labels = y_true[:, :, :-1] anchor_state = y_true[:, :, -1] # -1 for ignore, 0 for background, 1 for object classification = y_pred # filter out "ignore" anchors indices = backend.where(keras.backend.not_equal(anchor_state, -1)) labels = backend.gather_nd(labels, indices) classification = backend.gather_nd(classification, indices) # compute the focal loss alpha_factor = keras.backend.ones_like(labels) * alpha alpha_factor = backend.where(keras.backend.equal(labels, 1), alpha_factor, 1 - alpha_factor) focal_weight = backend.where(keras.backend.equal(labels, 1), 1 - classification, classification) focal_weight = alpha_factor * focal_weight**gamma cls_loss = focal_weight * keras.backend.binary_crossentropy( labels, classification) # compute the normalizer: the number of positive anchors normalizer = backend.where(keras.backend.equal(anchor_state, 1)) normalizer = keras.backend.cast( keras.backend.shape(normalizer)[0], keras.backend.floatx()) normalizer = keras.backend.maximum(keras.backend.cast_to_floatx(1.0), normalizer) return keras.backend.sum(cls_loss) / normalizer
def _filter_detections(scores, labels): # threshold based on score indices = backend.where(keras.backend.greater(scores, score_threshold)) if nms: filtered_boxes = backend.gather_nd(boxes, indices) filtered_scores = keras.backend.gather(scores, indices)[:, 0] # perform NMS nms_indices = backend.non_max_suppression( filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold) # filter indices based on NMS indices = keras.backend.gather(indices, nms_indices) # add indices to list of all indices labels = backend.gather_nd(labels, indices) indices = keras.backend.stack([indices[:, 0], labels], axis=1) return indices
def filter_detections(boxes, classification, other=[], class_specific_filter=True, nms=True, score_threshold=0.05, max_detections=300, nms_threshold=0.5): """ Filter detections using the boxes and classification values. Args boxes : Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format. classification : Tensor of shape (num_boxes, num_classes) containing the classification scores. other : List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores. class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those. nms : Flag to enable/disable non maximum suppression. score_threshold : Threshold used to prefilter the boxes with. max_detections : Maximum number of detections to keep. nms_threshold : Threshold for the IoU value to determine when a box should be suppressed. Returns A list of [boxes, scores, labels, other[0], other[1], ...]. boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes. scores is shaped (max_detections,) and contains the scores of the predicted class. labels is shaped (max_detections,) and contains the predicted label. other[i] is shaped (max_detections, ...) and contains the filtered other[i] data. In case there are less than max_detections detections, the tensors are padded with -1's. """ def _filter_detections(scores, labels): # threshold based on score indices = backend.where(keras.backend.greater(scores, score_threshold)) if nms: filtered_boxes = backend.gather_nd(boxes, indices) filtered_scores = keras.backend.gather(scores, indices)[:, 0] # perform NMS nms_indices = backend.non_max_suppression( filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold) # filter indices based on NMS indices = keras.backend.gather(indices, nms_indices) # add indices to list of all indices labels = backend.gather_nd(labels, indices) indices = keras.backend.stack([indices[:, 0], labels], axis=1) return indices if class_specific_filter: all_indices = [] # perform per class filtering for c in range(int(classification.shape[1])): scores = classification[:, c] labels = c * backend.ones( (keras.backend.shape(scores)[0], ), dtype='int64') all_indices.append(_filter_detections(scores, labels)) # concatenate indices to single tensor indices = keras.backend.concatenate(all_indices, axis=0) else: scores = keras.backend.max(classification, axis=1) labels = keras.backend.argmax(classification, axis=1) indices = _filter_detections(scores, labels) # select top k scores = backend.gather_nd(classification, indices) labels = indices[:, 1] scores, top_indices = backend.top_k(scores, k=keras.backend.minimum( max_detections, keras.backend.shape(scores)[0])) # filter input using the final set of indices indices = keras.backend.gather(indices[:, 0], top_indices) boxes = keras.backend.gather(boxes, indices) labels = keras.backend.gather(labels, top_indices) other_ = [keras.backend.gather(o, indices) for o in other] # zero pad the outputs pad_size = keras.backend.maximum( 0, max_detections - keras.backend.shape(scores)[0]) boxes = backend.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1) scores = backend.pad(scores, [[0, pad_size]], constant_values=-1) labels = backend.pad(labels, [[0, pad_size]], constant_values=-1) labels = keras.backend.cast(labels, 'int32') other_ = [ backend.pad(o, [[0, pad_size]] + [[0, 0] for _ in range(1, len(o.shape))], constant_values=-1) for o in other_ ] # set shapes, since we know what they are boxes.set_shape([max_detections, 4]) scores.set_shape([max_detections]) labels.set_shape([max_detections]) for o, s in zip(other_, [list(keras.backend.int_shape(o)) for o in other]): o.set_shape([max_detections] + s[1:]) return [boxes, scores, labels] + other_