Example #1
0
    def get_predictions(self,
                        score_threshold=0.1,
                        iou_threshold=0.6,
                        max_boxes=20):
        """Postprocess outputs of the network.

        Returns:
            boxes: a float tensor with shape [batch_size, N, 4].
            scores: a float tensor with shape [batch_size, N].
            num_boxes: an int tensor with shape [batch_size], it
                represents the number of detections on an image.

            where N = max_boxes.
        """
        # 良心活:输入的图像有rescale操作,得到的预测结果也有对应的还原操作
        with tf.name_scope('postprocessing'):
            boxes = batch_decode(self.box_encodings, self.anchors)
            # if the images were padded we need to rescale predicted boxes:
            boxes = boxes / self.box_scaler
            boxes = tf.clip_by_value(boxes, 0.0, 1.0)
            # it has shape [batch_size, num_anchors, 4]

            scores = tf.nn.softmax(self.class_predictions_with_background,
                                   axis=2)[:, :, 1]
            # it has shape [batch_size, num_anchors]
        # 执行NMS操作来去除重复的检测框
        with tf.device('/cpu:0'), tf.name_scope('nms'):
            boxes, scores, num_detections = batch_non_max_suppression(
                boxes, scores, score_threshold, iou_threshold, max_boxes)
        # 返回的字典包括:检测框、检测框对应的分数、以及检测框的总数目
        return {'boxes': boxes, 'scores': scores, 'num_boxes': num_detections}
    def get_predictions(self,
                        score_threshold=0.1,
                        iou_threshold=0.6,
                        max_boxes=20):
        """Postprocess outputs of the network.

        Returns:
            boxes: a float tensor with shape [batch_size, N, 4].
            scores: a float tensor with shape [batch_size, N].
            num_boxes: an int tensor with shape [batch_size], it
                represents the number of detections on an image.

            where N = max_boxes.
        """
        with tf.name_scope('postprocessing'):
            boxes = batch_decode(self.box_encodings, self.anchors)
            # it has shape [batch_size, num_anchors, 4]

            scores = tf.nn.softmax(self.class_predictions_with_background,
                                   axis=2)[:, :, 1]
            # it has shape [batch_size, num_anchors]

        with tf.device('/cpu:0'), tf.name_scope('nms'):
            boxes, scores, num_detections = batch_non_max_suppression(
                boxes, scores, score_threshold, iou_threshold, max_boxes)
        return {'boxes': boxes, 'scores': scores, 'num_boxes': num_detections}
Example #3
0
 def get_box_prediction(self):
     """
     return predicted boxes with shape (batch_size, num_anchors, 4)
     """
     boxes = batch_decode(self.box_encodings, self.anchors)
     # if the images were padded we need to rescale predicted boxes:
     boxes = boxes / self.box_scaler
     boxes = tf.clip_by_value(boxes, 0.0, 1.0)
     # it has shape [batch_size, num_anchors, 4]
     return boxes
Example #4
0
    def get_predictions(self,
                        score_threshold=0.1,
                        iou_threshold=0.6,
                        max_boxes_per_class=20):
        with tf.name_scope('postprocessing'):
            boxes = batch_decode(self.box_encodings, self.anchors)
            # it has shape [batch_size, num_anchors, 4]
            class_predictions_without_background = tf.slice(
                self.class_predictions_with_background, [0, 0, 1],
                [-1, -1, -1])
            scores = tf.sigmoid(class_predictions_without_background)
            # it has shape [batch_size, num_anchors, num_classes]

        with tf.device('/cpu:0'), tf.name_scope('nms'):
            boxes, scores, classes, num_detections = batch_multiclass_non_max_suppression(
                boxes, scores, score_threshold, iou_threshold,
                max_boxes_per_class, self.num_classes)
        return {
            'boxes': boxes,
            'labels': classes,
            'scores': scores,
            'num_boxes': num_detections
        }
Example #5
0
    def loss(self, groundtruth, params):
        """Compute scalar loss tensors with respect to provided groundtruth.

        Arguments:
            groundtruth: a dict with the following keys
                'boxes': a float tensor with shape [batch_size, max_num_boxes, 4].
                'num_boxes': an int tensor with shape [batch_size].
                    where max_num_boxes = max(num_boxes).
            params: a dict with parameters for OHEM.
        Returns:
            two float tensors with shape [].
        """
        reg_targets, matches = self._create_targets(groundtruth)

        with tf.name_scope('losses'):

            # whether anchor is matched
            is_matched = tf.greater_equal(matches, 0)
            weights = tf.to_float(is_matched)
            # shape [batch_size, num_anchors]

            # we have binary classification for each anchor
            # cls_targets 表示真实的分类结果
            cls_targets = tf.to_int32(is_matched)

            with tf.name_scope('classification_loss'):
                # 计算分类的代价值,默认权重为1
                cls_losses = classification_loss(
                    self.class_predictions_with_background, cls_targets)
            with tf.name_scope('localization_loss'):
                # 计算定位的代价值,只有正样本才有对应的损失值
                location_losses = localization_loss(self.box_encodings,
                                                    reg_targets, weights)
            # they have shape [batch_size, num_anchors]

            with tf.name_scope('normalization'):
                matches_per_image = tf.reduce_sum(weights,
                                                  axis=1)  # shape [batch_size]
                num_matches = tf.reduce_sum(matches_per_image)  # shape []
                normalizer = tf.maximum(num_matches, 1.0)

            scores = tf.nn.softmax(self.class_predictions_with_background,
                                   axis=2)
            # it has shape [batch_size, num_anchors, 2]

            decoded_boxes = batch_decode(self.box_encodings, self.anchors)
            decoded_boxes = decoded_boxes / self.box_scaler
            # it has shape [batch_size, num_anchors, 4]

            # add summaries for predictions
            is_background = tf.equal(matches, -1)
            self._add_scalewise_histograms(
                tf.to_float(is_background) * scores[:, :, 0],
                'background_probability')
            self._add_scalewise_histograms(weights * scores[:, :, 1],
                                           'face_probability')
            ymin, xmin, ymax, xmax = tf.unstack(decoded_boxes, axis=2)
            h, w = ymax - ymin, xmax - xmin
            self._add_scalewise_histograms(weights * h, 'box_heights')
            self._add_scalewise_histograms(weights * w, 'box_widths')

            # add summaries for losses and matches
            self._add_scalewise_matches_summaries(weights)
            self._add_scalewise_summaries(cls_losses,
                                          name='classification_losses')
            self._add_scalewise_summaries(location_losses,
                                          name='localization_losses')
            tf.summary.scalar('total_mean_matches_per_image',
                              tf.reduce_mean(matches_per_image))

            with tf.name_scope('ohem'):
                location_loss, cls_loss = apply_hard_mining(
                    location_losses,
                    cls_losses,
                    self.class_predictions_with_background,
                    matches,
                    decoded_boxes,
                    loss_to_use=params['loss_to_use'],
                    loc_loss_weight=params['loc_loss_weight'],
                    cls_loss_weight=params['cls_loss_weight'],
                    num_hard_examples=params['num_hard_examples'],
                    nms_threshold=params['nms_threshold'],
                    max_negatives_per_positive=params[
                        'max_negatives_per_positive'],
                    min_negatives_per_image=params['min_negatives_per_image'])
                return {
                    'localization_loss': location_loss / normalizer,
                    'classification_loss': cls_loss / normalizer
                }
Example #6
0
def apply_hard_mining(location_losses,
                      cls_losses,
                      class_predictions_with_background,
                      box_encodings,
                      matches,
                      anchors,
                      loss_to_use='classification',
                      loc_loss_weight=1.0,
                      cls_loss_weight=1.0,
                      num_hard_examples=3000,
                      nms_threshold=0.99,
                      max_negatives_per_positive=3,
                      min_negatives_per_image=0):
    """Applies hard mining to anchorwise losses.

    Arguments:
        location_losses: a float tensor with shape [batch_size, num_anchors].
        cls_losses: a float tensor with shape [batch_size, num_anchors].
        box_encodings: a float tensor with shape [batch_size, num_anchors, 4].
        class_predictions_with_background: a float tensor with shape [batch_size, num_anchors, num_classes + 1].
        matches: an int tensor with shape [batch_size, num_anchors].
        anchors: a float tensor with shape [num_anchors, 4].
        loss_to_use: a string, only possible values are ['classification', 'both'].
        loc_loss_weight: a float number.
        cls_loss_weight: a float number.
        num_hard_examples: an integer.
        nms_threshold: a float number.
        max_negatives_per_positive: a float number.
        min_negatives_per_image: an integer.
    Returns:
        two float tensors with shape [].
    """
    decoded_boxes = batch_decode(box_encodings, anchors)
    # it has shape [batch_size, num_anchors, 4]

    # all these tensors must have static first dimension (batch size)
    decoded_boxes_list = tf.unstack(decoded_boxes, axis=0)
    location_losses_list = tf.unstack(location_losses, axis=0)
    cls_losses_list = tf.unstack(cls_losses, axis=0)
    matches_list = tf.unstack(matches, axis=0)
    # they all lists with length = batch_size

    batch_size = len(decoded_boxes_list)
    num_positives_list, num_negatives_list = [], []
    mined_location_losses, mined_cls_losses = [], []

    # do OHEM for each image in the batch
    for i, box_locations in enumerate(decoded_boxes_list):
        image_losses = cls_losses_list[i] * cls_loss_weight
        if loss_to_use == 'both':
            image_losses += (location_losses_list[i] * loc_loss_weight)
        # it has shape [num_anchors]

        selected_indices = tf.image.non_max_suppression(
            box_locations, image_losses, num_hard_examples, nms_threshold)
        selected_indices, num_positives, num_negatives = _subsample_selection_to_desired_neg_pos_ratio(
            selected_indices, matches_list[i], max_negatives_per_positive,
            min_negatives_per_image)
        num_positives_list.append(num_positives)
        num_negatives_list.append(num_negatives)
        mined_location_losses.append(
            tf.reduce_sum(tf.gather(location_losses_list[i],
                                    selected_indices)))
        mined_cls_losses.append(
            tf.reduce_sum(tf.gather(cls_losses_list[i], selected_indices)))

    mean_num_positives = tf.reduce_mean(tf.stack(num_positives_list, axis=0))
    mean_num_negatives = tf.reduce_mean(tf.stack(num_negatives_list, axis=0))
    tf.summary.scalar('mean_num_positives', mean_num_positives)
    tf.summary.scalar('mean_num_negatives', mean_num_negatives)

    location_loss = tf.reduce_sum(tf.stack(mined_location_losses, axis=0))
    cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses, axis=0))
    return location_loss, cls_loss