def testFocalLossPerfectScore(self): # Test focal loss for a perfect case where logit probabilities are # higher for the expected classes # Set inputs for focal_loss logits_array = np.transpose( np.array( [ [ _logit(0.55), _logit(0.52), _logit(0.50), _logit(0.48), _logit(0.45), ], [ _logit(0.95), _logit(0.82), _logit(0.80), _logit(0.28), _logit(0.35), ], ], dtype=np.float32, ) ) labels_array = np.transpose( np.array([[0, 0, 0, 1, 1], [1, 1, 1, 0, 0]], dtype=np.float32) ) prediction_tensor = tf.placeholder(tf.float32) target_tensor = tf.placeholder(tf.float32) loss_tf = focal_loss(prediction_tensor, target_tensor) with tf.Session() as sess: loss = sess.run( loss_tf, feed_dict={ prediction_tensor: logits_array, target_tensor: labels_array, }, ) expected_loss = [0.00022774, 0.00787424, 0.00892574, 0.03088996, 0.07966313] self.assertAllClose(loss, expected_loss)
def testFocalLossImperfectScore(self): # Test focal loss for a perfect case where logit probabilities are # lower for the expected classes # Set inputs for focal_loss logits_array = np.transpose( np.array( [ [ _logit(0.55), _logit(0.52), _logit(0.50), _logit(0.48), _logit(0.45), ], [ _logit(0.95), _logit(0.82), _logit(0.80), _logit(0.28), _logit(0.35), ], ], dtype=np.float32, ) ) labels_array = np.transpose( np.array([[1, 1, 1, 0, 0], [0, 0, 0, 1, 1]], dtype=np.float32) ) prediction_tensor = tf.placeholder(tf.float32) target_tensor = tf.placeholder(tf.float32) loss_tf = focal_loss(prediction_tensor, target_tensor) with tf.Session() as sess: loss = sess.run( loss_tf, feed_dict={ prediction_tensor: logits_array, target_tensor: labels_array, }, ) expected_loss = [2.4771614, 1.0766783, 1.0300404, 0.60194975, 0.33609733] self.assertAllClose(loss, expected_loss)
def loss(self, prediction_dict): """ Returns cost for Region Proposal Network based on: Args: rpn_cls_score: Score for being an object or not for each anchor in the image. Shape: (num_anchors, 2) rpn_cls_target: Ground truth labeling for each anchor. Should be * 1: for positive labels * 0: for negative labels * -1: for labels we should ignore. Shape: (num_anchors, ) rpn_bbox_target: Bounding box output delta target for rpn. Shape: (num_anchors, 4) rpn_bbox_pred: Bounding box output delta prediction for rpn. Shape: (num_anchors, 4) Returns: Multiloss between cls probability and bbox target. """ rpn_cls_score = prediction_dict["rpn_cls_score"] rpn_cls_target = prediction_dict["rpn_cls_target"] rpn_bbox_target = prediction_dict["rpn_bbox_target"] rpn_bbox_pred = prediction_dict["rpn_bbox_pred"] with tf.variable_scope("RPNLoss"): # Flatten already flat Tensor for usage as boolean mask filter. rpn_cls_target = tf.cast(tf.reshape(rpn_cls_target, [-1]), tf.int32, name="rpn_cls_target") # Transform to boolean tensor mask for not ignored. labels_not_ignored = tf.not_equal(rpn_cls_target, -1, name="labels_not_ignored") # Now we only have the labels we are going to compare with the # cls probability. labels = tf.boolean_mask(rpn_cls_target, labels_not_ignored) cls_score = tf.boolean_mask(rpn_cls_score, labels_not_ignored) # We need to transform `labels` to `cls_score` shape. # convert [1, 0] to [[0, 1], [1, 0]] for ce with logits. cls_target = tf.one_hot(labels, depth=2) # Equivalent to log loss if self.loss_type == CROSS_ENTROPY: # TODO PV make this a loss function in losses.py ce_per_anchor = tf.nn.softmax_cross_entropy_with_logits_v2( labels=cls_target, logits=cls_score) if self.loss_weight != 1: ce_per_anchor = ce_per_anchor * self.loss_weight elif self.loss_type == FOCAL: ce_per_anchor = focal_loss(cls_score, cls_target, self.focal_gamma) # TODO PV Rename cross entropy per anchor to reflect focal loss is # calculated prediction_dict["cross_entropy_per_anchor"] = ce_per_anchor # Finally, we need to calculate the regression loss over # `rpn_bbox_target` and `rpn_bbox_pred`. # We use SmoothL1Loss. rpn_bbox_target = tf.reshape(rpn_bbox_target, [-1, 4]) rpn_bbox_pred = tf.reshape(rpn_bbox_pred, [-1, 4]) # We only care for positive labels (we ignore backgrounds since # we don't have any bounding box information for it). positive_labels = tf.equal(rpn_cls_target, 1) rpn_bbox_target = tf.boolean_mask(rpn_bbox_target, positive_labels) rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, positive_labels) # We apply smooth l1 loss as described by the Fast R-CNN paper. reg_loss_per_anchor = smooth_l1_loss(rpn_bbox_pred, rpn_bbox_target, sigma=self._l1_sigma) prediction_dict["reg_loss_per_anchor"] = reg_loss_per_anchor # Loss summaries. tf.summary.scalar("batch_size", tf.shape(labels)[0], ["rpn"]) foreground_cls_loss = tf.boolean_mask(ce_per_anchor, tf.equal(labels, 1)) background_cls_loss = tf.boolean_mask(ce_per_anchor, tf.equal(labels, 0)) tf.summary.scalar("foreground_cls_loss", tf.reduce_mean(foreground_cls_loss), ["rpn"]) tf.summary.histogram("foreground_cls_loss", foreground_cls_loss, ["rpn"]) tf.summary.scalar("background_cls_loss", tf.reduce_mean(background_cls_loss), ["rpn"]) tf.summary.histogram("background_cls_loss", background_cls_loss, ["rpn"]) tf.summary.scalar("foreground_samples", tf.shape(rpn_bbox_target)[0], ["rpn"]) return { "rpn_cls_loss": tf.reduce_mean(ce_per_anchor), "rpn_reg_loss": tf.reduce_mean(reg_loss_per_anchor), }
def loss(self, prediction_dict, return_all=False): """Compute the loss for SSD. Args: prediction_dict: The output dictionary of the _build method from which we use different main keys: cls_pred: A dictionary with the classes classification. loc_pred: A dictionary with the localization predictions target: A dictionary with the targets for both classes and localizations. Returns: A tensor for the total loss. """ with tf.name_scope("losses"): cls_pred = prediction_dict["cls_pred"] cls_target = tf.cast(prediction_dict["target"]["cls"], tf.int32) # Transform to one-hot vector cls_target_one_hot = tf.one_hot(cls_target, depth=self._num_classes + 1, name="cls_target_one_hot") # We get cross entropy loss of each proposal. # TODO: Optimization opportunity: We calculate the probabilities # earlier in the program, so if we used those instead of the # logits we would not have the need to do softmax here too. if self.loss_type == CROSS_ENTROPY: classification_loss_per_proposal = ( tf.nn.softmax_cross_entropy_with_logits_v2( labels=cls_target_one_hot, logits=cls_pred)) elif self.loss_type == FOCAL: classification_loss_per_proposal = focal_loss( cls_pred, cls_target_one_hot, self.focal_gamma) # Second we need to calculate the smooth l1 loss between # `bbox_offsets` and `bbox_offsets_targets`. bbox_offsets = prediction_dict["loc_pred"] bbox_offsets_targets = prediction_dict["target"]["bbox_offsets"] # We only want the non-background labels bounding boxes. not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1]) bbox_offsets_positives = tf.boolean_mask( bbox_offsets, not_ignored, name="bbox_offsets_positives") bbox_offsets_target_positives = tf.boolean_mask( bbox_offsets_targets, not_ignored, name="bbox_offsets_target_positives") # Calculate the smooth l1 regression loss between the flatten # bboxes offsets and the labeled targets. reg_loss_per_proposal = smooth_l1_loss( bbox_offsets_positives, bbox_offsets_target_positives) cls_loss = tf.reduce_sum(classification_loss_per_proposal) bbox_loss = tf.reduce_sum(reg_loss_per_proposal) # Following the paper, set loss to 0 if there are 0 bboxes # assigned as foreground targets. safety_condition = tf.not_equal( tf.shape(bbox_offsets_positives)[0], 0) final_loss = tf.cond( safety_condition, true_fn=lambda: ((cls_loss + bbox_loss * self._loc_loss_weight) / tf.cast( tf.shape(bbox_offsets_positives)[0], tf.float32)), false_fn=lambda: 0.0, ) tf.losses.add_loss(final_loss) total_loss = tf.losses.get_total_loss() prediction_dict["reg_loss_per_proposal"] = reg_loss_per_proposal prediction_dict[ "cls_loss_per_proposal"] = classification_loss_per_proposal tf.summary.scalar("cls_loss", cls_loss, collections=self._losses_collections) tf.summary.scalar("bbox_loss", bbox_loss, collections=self._losses_collections) tf.summary.scalar("total_loss", total_loss, collections=self._losses_collections) if return_all: return { "total_loss": total_loss, "cls_loss": cls_loss, "bbox_loss": bbox_loss, } else: return total_loss
def loss(self, prediction_dict): """ Returns cost for RCNN based on: Args: prediction_dict with keys: rcnn: cls_score: shape (num_proposals, num_classes + 1) Has the class scoring for each the proposals. Classes are 1-indexed with 0 being the background. cls_prob: shape (num_proposals, num_classes + 1) Application of softmax on cls_score. bbox_offsets: shape (num_proposals, num_classes * 4) Has the offset for each proposal for each class. We have to compare only the proposals labeled with the offsets for that label. target: cls_target: shape (num_proposals,) Has the correct label for each of the proposals. 0 => background 1..n => 1-indexed classes bbox_offsets_target: shape (num_proposals, 4) Has the true offset of each proposal for the true label. In case of not having a true label (non-background) then it's just zeroes. Returns: loss_dict with keys: rcnn_cls_loss: The cross-entropy or log-loss of the classification tasks between then num_classes + background. rcnn_reg_loss: The smooth L1 loss for the bounding box regression task to adjust correctly labeled boxes. """ with tf.name_scope("RCNNLoss"): cls_score = prediction_dict["rcnn"]["cls_score"] # cls_prob = prediction_dict['rcnn']['cls_prob'] # Cast target explicitly as int32. cls_target = tf.cast(prediction_dict["target"]["cls"], tf.int32) # First we need to calculate the log loss betweetn cls_prob and # cls_target # We only care for the targets that are >= 0 not_ignored = tf.reshape( tf.greater_equal(cls_target, 0), [-1], name="not_ignored" ) # We apply boolean mask to score, prob and target. cls_score_labeled = tf.boolean_mask( cls_score, not_ignored, name="cls_score_labeled" ) # cls_prob_labeled = tf.boolean_mask( # cls_prob, not_ignored, name='cls_prob_labeled') cls_target_labeled = tf.boolean_mask( cls_target, not_ignored, name="cls_target_labeled" ) tf.summary.scalar("batch_size", tf.shape(cls_score_labeled)[0], ["rcnn"]) # Transform to one-hot vector cls_target_one_hot = tf.one_hot( cls_target_labeled, depth=self._num_classes + 1, name="cls_target_one_hot", ) if self.loss_type == CROSS_ENTROPY: # your class weights class_weights = self.loss_weight onehot_labels = tf.stop_gradient(cls_target_one_hot) # deduce weights for batch samples based on their true label # compute your (unweighted) softmax cross entropy loss cross_entropy_per_proposal = tf.nn.softmax_cross_entropy_with_logits( labels=onehot_labels, logits=cls_score_labeled ) if class_weights != 1: class_weights = tf.constant([class_weights], dtype=tf.float32) weights = tf.reduce_sum(class_weights * onehot_labels, axis=1) # apply the weights, relying on broadcasting # of the multiplication cross_entropy_per_proposal = cross_entropy_per_proposal * weights elif self.loss_type == FOCAL: cross_entropy_per_proposal = focal_loss( cls_score_labeled, tf.stop_gradient(cls_target_one_hot), self.focal_gamma, ) if self._debug: prediction_dict["_debug"]["losses"] = {} # Save the classification loss per proposal to be able to # visualize proposals with high and low error. prediction_dict["_debug"]["losses"][ "cross_entropy_per_proposal" ] = cross_entropy_per_proposal # Second we need to calculate the smooth l1 loss between # `bbox_offsets` and `bbox_offsets_target`. bbox_offsets = prediction_dict["rcnn"]["bbox_offsets"] bbox_offsets_target = prediction_dict["target"]["bbox_offsets"] # We only want the non-background labels bounding boxes. not_ignored = tf.reshape(tf.greater(cls_target, 0), [-1]) bbox_offsets_labeled = tf.boolean_mask( bbox_offsets, not_ignored, name="bbox_offsets_labeled" ) bbox_offsets_target_labeled = tf.boolean_mask( bbox_offsets_target, not_ignored, name="bbox_offsets_target_labeled" ) cls_target_labeled = tf.boolean_mask( cls_target, not_ignored, name="cls_target_labeled" ) # `cls_target_labeled` is based on `cls_target` which has # `num_classes` + 1 classes. # for making `one_hot` with depth `num_classes` to work we need # to lower them to make them 0-index. cls_target_labeled = cls_target_labeled - 1 cls_target_one_hot = tf.one_hot( cls_target_labeled, depth=self._num_classes, name="cls_target_one_hot" ) # cls_target now is (num_labeled, num_classes) bbox_flatten = tf.reshape( bbox_offsets_labeled, [-1, 4], name="bbox_flatten" ) # We use the flatten cls_target_one_hot as boolean mask for the # bboxes. cls_flatten = tf.cast( tf.reshape(cls_target_one_hot, [-1]), tf.bool, "cls_flatten_as_bool" ) bbox_offset_cleaned = tf.boolean_mask( bbox_flatten, cls_flatten, "bbox_offset_cleaned" ) # Calculate the smooth l1 loss between the "cleaned" bboxes # offsets (that means, the useful results) and the labeled # targets. reg_loss_per_proposal = smooth_l1_loss( bbox_offset_cleaned, bbox_offsets_target_labeled, sigma=self._l1_sigma ) tf.summary.scalar( "rcnn_foreground_samples", tf.shape(bbox_offset_cleaned)[0], ["rcnn"] ) if self._debug: # Also save reg loss per proposals to be able to visualize # good and bad proposals in debug mode. prediction_dict["_debug"]["losses"][ "reg_loss_per_proposal" ] = reg_loss_per_proposal return { "rcnn_cls_loss": tf.reduce_mean(cross_entropy_per_proposal), "rcnn_reg_loss": tf.reduce_mean(reg_loss_per_proposal), }