Exemple #1
0
    def _build(self, cls_prob, loc_pred, all_anchors, im_shape):
        """
        Args:
            cls_prob: A softmax probability for each anchor where the idx = 0
                is the background class (which we should ignore).
                Shape (total_anchors, num_classes + 1)
            loc_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
            im_shape: A Tensor with the image shape in format (height, width).
        Returns:
            prediction_dict with the following keys:
                raw_proposals: The raw proposals i.e. the anchors adjusted
                    using loc_pred.
                proposals: The proposals of the network after appling some
                    filters like negative area; and NMS. It's shape is
                    (final_num_proposals, 4), where final_num_proposals is
                    unknown before-hand (it depends on NMS).
                    The 4-length Tensor for each corresponds to:
                    (x_min, y_min, x_max, y_max).
                proposal_label: It's shape is (final_num_proposals,)
                proposal_label_prob: It's shape is (final_num_proposals,)
        """
        selected_boxes = []
        selected_probs = []
        selected_labels = []
        selected_anchors = []  # For debugging

        for class_id in range(self._num_classes):
            # Get the confidences for this class (+ 1 is to ignore background)
            class_cls_prob = cls_prob[:, class_id + 1]

            # Filter by min_prob_threshold
            min_prob_filter = tf.greater_equal(class_cls_prob,
                                               self._min_prob_threshold)
            class_cls_prob = tf.boolean_mask(class_cls_prob, min_prob_filter)
            class_loc_pred = tf.boolean_mask(loc_pred, min_prob_filter)
            anchors = tf.boolean_mask(all_anchors, min_prob_filter)

            # Using the loc_pred and the anchors, we generate the proposals.
            raw_proposals = decode(anchors, class_loc_pred, self._variances)
            # Clip boxes to image.
            clipped_proposals = clip_boxes(raw_proposals, im_shape)

            # Filter proposals that have an non-valid area.
            (x_min, y_min, x_max, y_max) = tf.unstack(clipped_proposals,
                                                      axis=1)
            proposal_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.) * tf.maximum(y_max - y_min, 0.),
                0.)
            class_proposals = tf.boolean_mask(clipped_proposals,
                                              proposal_filter)
            class_loc_pred = tf.boolean_mask(class_loc_pred, proposal_filter)
            class_cls_prob = tf.boolean_mask(class_cls_prob, proposal_filter)
            proposal_anchors = tf.boolean_mask(anchors, proposal_filter)

            # Log results of filtering non-valid area proposals
            total_anchors = tf.shape(all_anchors)[0]
            total_proposals = tf.shape(class_proposals)[0]
            total_raw_proposals = tf.shape(raw_proposals)[0]
            tf.summary.scalar('invalid_proposals',
                              total_proposals - total_raw_proposals, ['ssd'])
            tf.summary.scalar(
                'valid_proposals_ratio',
                tf.cast(total_anchors, tf.float32) /
                tf.cast(total_proposals, tf.float32), ['ssd'])

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            # After gathering results we should normalize it back.
            class_proposal_tf = change_order(class_proposals)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_proposal_tf,
                class_cls_prob,
                self._class_max_detections,
                iou_threshold=self._class_nms_threshold)

            # Using NMS resulting indices, gather values from Tensors.
            class_proposal_tf = tf.gather(class_proposal_tf,
                                          class_selected_idx)
            class_cls_prob = tf.gather(class_cls_prob, class_selected_idx)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            selected_boxes.append(class_proposal_tf)
            selected_probs.append(class_cls_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))
            selected_anchors.append(proposal_anchors)

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        proposals_tf = tf.concat(selected_boxes, axis=0)
        # Return to the original convention.
        proposals = change_order(proposals_tf)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)
        proposal_anchors = tf.concat(selected_anchors, axis=0)

        # Get topK detections of all classes.
        k = tf.minimum(self._total_max_detections,
                       tf.shape(proposal_label_prob)[0])
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_proposals = tf.gather(proposals, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)
        top_k_proposal_anchors = tf.gather(proposal_anchors, top_k.indices)

        return {
            'objects': top_k_proposals,
            'labels': top_k_proposal_label,
            'probs': top_k_proposal_label_prob,
            'raw_proposals': raw_proposals,
            'anchors': top_k_proposal_anchors,
        }
Exemple #2
0
    def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape):
        """

        Args:
            rpn_cls_prob: A Tensor with the softmax output for each anchor.
                Its shape should be (total_anchors, 2), with the probability of
                being background and the probability of being foreground for
                each anchor.
            rpn_bbox_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
            im_shape: A Tensor with the image shape in format (height, width).

        Returns:
            prediction_dict with the following keys:
                nms_proposals: A Tensor with the final selected proposed
                    bounding boxes. Its shape should be
                    (total_nms_proposals, 4).
                nms_proposals_scores: A Tensor with the probability of being an
                    object for that proposal. Its shape should be
                    (total_nms_proposals, 1)
                scores:  A Tensor with the scores of the proposals contained
                    in `proposals` and `proposals_unclipped`.
                proposals: A Tensor with all the valid area RPN proposals, this
                    tensor is returned in debug mode and is used for
                    testing, the proposals are clipped if `clip_after_nms` is
                    set to False.
                proposals_unclipped: Same as proposals but the proposals in
                    this tensor are never clipped.
                all_proposals: A Tensor with all the proposals, including the
                    ones with zero or negative area.
        """
        # Scores are extracted from the second scalar of the cls probability.
        # cls_probability is a softmax of (background, foreground).
        scores = rpn_cls_prob[:, 1]
        # Force flatten the scores (it should be already be flatten).
        scores = tf.reshape(scores, [-1])

        if self._filter_outside_anchors:
            with tf.name_scope('filter_outside_anchors'):
                (x_min_anchor, y_min_anchor,
                 x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1)

                anchor_filter = tf.logical_and(
                    tf.logical_and(
                        tf.greater_equal(x_min_anchor, 0),
                        tf.greater_equal(y_min_anchor, 0)
                    ),
                    tf.logical_and(
                        tf.less(x_max_anchor, im_shape[1]),
                        tf.less(y_max_anchor, im_shape[0])
                    )
                )
                anchor_filter = tf.reshape(anchor_filter, [-1])
                all_anchors = tf.boolean_mask(
                    all_anchors, anchor_filter, name='filter_anchors')
                rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter)
                scores = tf.boolean_mask(scores, anchor_filter)

        # Decode boxes
        all_proposals = decode(all_anchors, rpn_bbox_pred)

        # Filter proposals with negative or zero area.
        (x_min, y_min, x_max, y_max) = tf.unstack(
            all_proposals, axis=1
        )
        proposal_filter = tf.greater(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0
        )
        proposal_filter = tf.reshape(proposal_filter, [-1])

        # Filter proposals and scores.
        total_proposals = tf.shape(scores)[0]
        scores = tf.boolean_mask(
            scores, proposal_filter,
            name='filter_invalid_scores'
        )
        proposals = tf.boolean_mask(
            all_proposals, proposal_filter,
            name='filter_invalid_proposals'
        )
        if self._debug:
            proposals_unclipped = tf.identity(proposals)

        if not self._clip_after_nms:
            # Clip proposals to the image.
            proposals = clip_boxes(proposals, im_shape)

        filtered_proposals = tf.shape(scores)[0]

        tf.summary.scalar(
            'valid_proposals_ratio',
            (
                tf.cast(filtered_proposals, tf.float32) /
                tf.cast(total_proposals, tf.float32)
            ), ['rpn'])

        tf.summary.scalar(
            'invalid_proposals', total_proposals - filtered_proposals, ['rpn'])

        # Get top `pre_nms_top_n` indices by sorting the proposals by score.
        k = tf.minimum(self._pre_nms_top_n, tf.shape(scores)[0])
        top_k = tf.nn.top_k(scores, k=k)
        top_k_scores = top_k.values

        top_k_proposals = tf.gather(proposals, top_k.indices)
        # We reorder the proposals into TensorFlows bounding box order for
        # `tf.image.non_max_supression` compatibility.
        proposals_tf_order = change_order(top_k_proposals)

        # We cut the pre_nms filter in pure TF version and go straight into
        # NMS.
        selected_indices = tf.image.non_max_suppression(
            proposals_tf_order, tf.squeeze(top_k_scores), self._post_nms_top_n,
            iou_threshold=self._nms_threshold
        )

        # Selected_indices is a smaller tensor, we need to extract the
        # proposals and scores using it.
        nms_proposals = tf.gather(
            proposals_tf_order, selected_indices, name='gather_nms_proposals'
        )
        nms_proposals_scores = tf.gather(
            top_k_scores, selected_indices, name='gather_nms_proposals_scores'
        )

        # We switch back again to the regular bbox encoding.
        nms_proposals = change_order(nms_proposals)

        if self._clip_after_nms:
            # Clip proposals to the image after NMS.
            nms_proposals = clip_boxes(nms_proposals, im_shape)

        # Adds batch number for consistency and multi image batch support.
        batch_inds = tf.zeros(
            (tf.shape(nms_proposals)[0], 1), dtype=tf.float32
        )
        nms_proposals = tf.concat([batch_inds, nms_proposals], axis=1)

        pred = {
            'nms_proposals': tf.stop_gradient(nms_proposals),
            'nms_proposals_scores': tf.stop_gradient(nms_proposals_scores),
        }

        if self._debug:
            pred.update({
                'proposals': proposals,
                'scores': scores,
                'proposals_unclipped': proposals_unclipped,
                'top_k_proposals': top_k_proposals,
                'top_k_scores': top_k_scores,
                'all_proposals': all_proposals,
            })

        return pred
Exemple #3
0
    def _build(self, proposals, bbox_pred, cls_prob, im_shape):
        """
        Args:
            proposals: Tensor with the RPN proposals bounding boxes.
                Shape (num_proposals, 4). Where num_proposals is less than
                POST_NMS_TOP_N (We don't know exactly beforehand)
            bbox_pred: Tensor with the RCNN delta predictions for each proposal
                for each class. Shape (num_proposals, 4 * num_classes)
            cls_prob: A softmax probability for each proposal where the idx = 0
                is the background class (which we should ignore).
                Shape (num_proposals, num_classes + 1)

        Returns:
            objects:
                Shape (final_num_proposals, 4)
                Where final_num_proposals is unknown before-hand (it depends on
                NMS). The 4-length Tensor for each corresponds to:
                (x_min, y_min, x_max, y_max).
            objects_label:
                Shape (final_num_proposals,)
            objects_label_prob:
                Shape (final_num_proposals,)

        """
        # First we want get the most probable label for each proposal
        # We still have the background on idx 0 so we subtract 1 to the idxs.
        proposal_label = tf.argmax(cls_prob, axis=1) - 1
        # Get the probability for the selected label for each proposal.
        proposal_label_prob = tf.reduce_max(cls_prob, axis=1)

        # We are going to use only the non-background proposals.
        non_background_filter = tf.greater_equal(proposal_label, 0)
        # Filter proposals with less than threshold probability.
        min_prob_filter = tf.greater_equal(proposal_label_prob,
                                           self._min_prob_threshold)
        proposal_filter = tf.logical_and(non_background_filter,
                                         min_prob_filter)

        total_proposals = tf.shape(proposals)[0]

        equal_shapes = tf.assert_equal(
            tf.shape(proposals)[0],
            tf.shape(bbox_pred)[0])
        with tf.control_dependencies([equal_shapes]):
            # Filter all tensors for getting all non-background proposals.
            proposals = tf.boolean_mask(proposals, proposal_filter)
            proposal_label = tf.boolean_mask(proposal_label, proposal_filter)
            proposal_label_prob = tf.boolean_mask(proposal_label_prob,
                                                  proposal_filter)
            bbox_pred = tf.boolean_mask(bbox_pred, proposal_filter)

        filtered_proposals = tf.shape(proposals)[0]

        tf.summary.scalar('background_or_low_prob_proposals',
                          total_proposals - filtered_proposals, ['rcnn'])

        # Create one hot with labels for using it to filter bbox_predictions.
        label_one_hot = tf.one_hot(proposal_label, depth=self._num_classes)
        # Flatten label_one_hot to get
        # (num_non_background_proposals * num_classes, 1) for filtering.
        label_one_hot_flatten = tf.cast(tf.reshape(label_one_hot, [-1]),
                                        tf.bool)
        # Flatten bbox_predictions getting
        # (num_non_background_proposals * num_classes, 4).
        bbox_pred_flatten = tf.reshape(bbox_pred, [-1, 4])

        equal_shapes = tf.assert_equal(
            tf.shape(bbox_pred_flatten)[0],
            tf.shape(label_one_hot_flatten)[0])
        with tf.control_dependencies([equal_shapes]):
            # Control same number of dimensions between bbox and mask.
            bbox_pred = tf.boolean_mask(bbox_pred_flatten,
                                        label_one_hot_flatten)

        # Using the bbox_pred and the proposals we generate the objects.
        raw_objects = decode(proposals, bbox_pred)
        # Clip boxes to image.
        clipped_objects = clip_boxes(raw_objects, im_shape)

        # Filter objects that have an non-valid area.
        (x_min, y_min, x_max, y_max) = tf.unstack(clipped_objects, axis=1)
        object_filter = tf.greater_equal(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0)

        total_raw_objects = tf.shape(raw_objects)[0]
        objects = tf.boolean_mask(clipped_objects, object_filter)
        proposal_label = tf.boolean_mask(proposal_label, object_filter)
        proposal_label_prob = tf.boolean_mask(proposal_label_prob,
                                              object_filter)

        total_objects = tf.shape(objects)[0]

        tf.summary.scalar('invalid_proposals',
                          total_objects - total_raw_objects, ['rcnn'])

        tf.summary.scalar(
            'valid_proposals_ratio',
            tf.cast(total_proposals, tf.float32) /
            tf.cast(total_objects, tf.float32), ['rcnn'])

        # We have to use the TensorFlow's bounding box convention to use the
        # included function for NMS.
        # After gathering results we should normalize it back.
        objects_tf = change_order(objects)

        selected_boxes = []
        selected_probs = []
        selected_labels = []
        # For each class we want to filter those objects and apply NMS to them.
        for class_id in range(self._num_classes):
            # Filter objects Tensors with class.
            class_filter = tf.equal(proposal_label, class_id)
            class_objects_tf = tf.boolean_mask(objects_tf, class_filter)
            class_prob = tf.boolean_mask(proposal_label_prob, class_filter)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf,
                class_prob,
                self._class_max_detections,
                iou_threshold=self._class_nms_threshold)

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # We append values to a regular list which will later be transform
            # to a proper Tensor.
            selected_boxes.append(class_objects_tf)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        objects_tf = tf.concat(selected_boxes, axis=0)
        # Return to the original convention.
        objects = change_order(objects_tf)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        # Get topK detections of all classes.
        k = tf.minimum(self._total_max_detections,
                       tf.shape(proposal_label_prob)[0])
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

        return {
            'raw_objects': raw_objects,
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,
        }
Exemple #4
0
    def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape):
        """

        Args:
            rpn_cls_prob: A Tensor with the softmax output for each anchor.
                Its shape should be (total_anchors, 2), with the probability of
                being background and the probability of being foreground for
                each anchor.
            rpn_bbox_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
            im_shape: A Tensor with the image shape in format (height, width).

        Returns:
            prediction_dict with the following keys:
                proposals: A Tensor with the final selected proposed
                    bounding boxes. Its shape should be
                    (total_proposals, 4).
                scores: A Tensor with the probability of being an
                    object for that proposal. Its shape should be
                    (total_proposals, 1)
        """
        # Scores are extracted from the second scalar of the cls probability.
        # cls_probability is a softmax of (background, foreground).
        all_scores = rpn_cls_prob[:, 1]
        # Force flatten the scores (it should be already be flatten).
        all_scores = tf.reshape(all_scores, [-1])

        if self._filter_outside_anchors:
            with tf.name_scope('filter_outside_anchors'):
                (x_min_anchor, y_min_anchor,
                 x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1)

                anchor_filter = tf.logical_and(
                    tf.logical_and(
                        tf.greater_equal(x_min_anchor, 0),
                        tf.greater_equal(y_min_anchor, 0)
                    ),
                    tf.logical_and(
                        tf.less(x_max_anchor, im_shape[1]),
                        tf.less(y_max_anchor, im_shape[0])
                    )
                )
                anchor_filter = tf.reshape(anchor_filter, [-1])
                all_anchors = tf.boolean_mask(
                    all_anchors, anchor_filter, name='filter_anchors')
                rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter)
                all_scores = tf.boolean_mask(all_scores, anchor_filter)

        # Decode boxes
        all_proposals = decode(all_anchors, rpn_bbox_pred)

        # Filter proposals with less than threshold probability.
        min_prob_filter = tf.greater_equal(
            all_scores, self._min_prob_threshold
        )

        # Filter proposals with negative or zero area.
        (x_min, y_min, x_max, y_max) = tf.unstack(all_proposals, axis=1)
        zero_area_filter = tf.greater(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0
        )
        proposal_filter = tf.logical_and(zero_area_filter, min_prob_filter)

        # Filter proposals and scores.
        all_proposals_total = tf.shape(all_scores)[0]
        unsorted_scores = tf.boolean_mask(
            all_scores, proposal_filter,
            name='filtered_scores'
        )
        unsorted_proposals = tf.boolean_mask(
            all_proposals, proposal_filter,
            name='filtered_proposals'
        )
        if self._debug:
            proposals_unclipped = tf.identity(unsorted_proposals)

        if not self._clip_after_nms:
            # Clip proposals to the image.
            unsorted_proposals = clip_boxes(unsorted_proposals, im_shape)

        filtered_proposals_total = tf.shape(unsorted_scores)[0]

        tf.summary.scalar(
            'valid_proposals_ratio',
            (
                tf.cast(filtered_proposals_total, tf.float32) /
                tf.cast(all_proposals_total, tf.float32)
            ), ['rpn'])

        tf.summary.scalar(
            'invalid_proposals',
            all_proposals_total - filtered_proposals_total, ['rpn'])

        # Get top `pre_nms_top_n` indices by sorting the proposals by score.
        k = tf.minimum(self._pre_nms_top_n, tf.shape(unsorted_scores)[0])
        top_k = tf.nn.top_k(unsorted_scores, k=k)

        sorted_top_proposals = tf.gather(unsorted_proposals, top_k.indices)
        sorted_top_scores = top_k.values

        if self._apply_nms:
            with tf.name_scope('nms'):
                # We reorder the proposals into TensorFlows bounding box order
                # for `tf.image.non_max_supression` compatibility.
                proposals_tf_order = change_order(sorted_top_proposals)
                # We cut the pre_nms filter in pure TF version and go straight
                # into NMS.
                selected_indices = tf.image.non_max_suppression(
                    proposals_tf_order, tf.reshape(
                        sorted_top_scores, [-1]
                    ),
                    self._post_nms_top_n, iou_threshold=self._nms_threshold
                )

                # Selected_indices is a smaller tensor, we need to extract the
                # proposals and scores using it.
                nms_proposals_tf_order = tf.gather(
                    proposals_tf_order, selected_indices,
                    name='gather_nms_proposals'
                )

                # We switch back again to the regular bbox encoding.
                proposals = change_order(nms_proposals_tf_order)
                scores = tf.gather(
                    sorted_top_scores, selected_indices,
                    name='gather_nms_proposals_scores'
                )
        else:
            proposals = sorted_top_proposals
            scores = sorted_top_scores

        if self._clip_after_nms:
            # Clip proposals to the image after NMS.
            proposals = clip_boxes(proposals, im_shape)

        pred = {
            'proposals': proposals,
            'scores': scores,
        }

        if self._debug:
            pred.update({
                'sorted_top_scores': sorted_top_scores,
                'sorted_top_proposals': sorted_top_proposals,
                'unsorted_proposals': unsorted_proposals,
                'unsorted_scores': unsorted_scores,
                'all_proposals': all_proposals,
                'all_scores': all_scores,
                # proposals_unclipped has the unsorted_scores scores
                'proposals_unclipped': proposals_unclipped,
            })

        return pred
Exemple #5
0
    def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape):
        """

        Args:
            rpn_cls_prob: A Tensor with the softmax output for each anchor.
                Its shape should be (total_anchors, 2), with the probability of
                being background and the probability of being foreground for
                each anchor.
            rpn_bbox_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
            im_shape: A Tensor with the image shape in format (height, width).

        Returns:
            prediction_dict with the following keys:
                proposals: A Tensor with the final selected proposed
                    bounding boxes. Its shape should be
                    (total_proposals, 4).
                scores: A Tensor with the probability of being an
                    object for that proposal. Its shape should be
                    (total_proposals, 1)
        """
        # Scores are extracted from the second scalar of the cls probability.
        # cls_probability is a softmax of (background, foreground).
        all_scores = rpn_cls_prob[:, 1]
        # Force flatten the scores (it should be already be flatten).
        all_scores = tf.reshape(all_scores, [-1])

        if self._filter_outside_anchors:
            with tf.name_scope('filter_outside_anchors'):
                (x_min_anchor, y_min_anchor,
                 x_max_anchor, y_max_anchor) = tf.unstack(all_anchors, axis=1)

                anchor_filter = tf.logical_and(
                    tf.logical_and(
                        tf.greater_equal(x_min_anchor, 0),
                        tf.greater_equal(y_min_anchor, 0)
                    ),
                    tf.logical_and(
                        tf.less(x_max_anchor, im_shape[1]),
                        tf.less(y_max_anchor, im_shape[0])
                    )
                )
                anchor_filter = tf.reshape(anchor_filter, [-1])
                all_anchors = tf.boolean_mask(
                    all_anchors, anchor_filter, name='filter_anchors')
                rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter)
                all_scores = tf.boolean_mask(all_scores, anchor_filter)

        # Decode boxes
        all_proposals = decode(all_anchors, rpn_bbox_pred)

        # Filter proposals with less than threshold probability.
        min_prob_filter = tf.greater_equal(
            all_scores, self._min_prob_threshold
        )

        # Filter proposals with negative or zero area.
        (x_min, y_min, x_max, y_max) = tf.unstack(all_proposals, axis=1)
        zero_area_filter = tf.greater(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0
        )
        proposal_filter = tf.logical_and(zero_area_filter, min_prob_filter)

        # Filter proposals and scores.
        all_proposals_total = tf.shape(all_scores)[0]
        unsorted_scores = tf.boolean_mask(
            all_scores, proposal_filter,
            name='filtered_scores'
        )
        unsorted_proposals = tf.boolean_mask(
            all_proposals, proposal_filter,
            name='filtered_proposals'
        )
        if self._debug:
            proposals_unclipped = tf.identity(unsorted_proposals)

        if not self._clip_after_nms:
            # Clip proposals to the image.
            unsorted_proposals = clip_boxes(unsorted_proposals, im_shape)

        filtered_proposals_total = tf.shape(unsorted_scores)[0]

        tf.summary.scalar(
            'valid_proposals_ratio',
            (
                tf.cast(filtered_proposals_total, tf.float32) /
                tf.cast(all_proposals_total, tf.float32)
            ), ['rpn'])

        tf.summary.scalar(
            'invalid_proposals',
            all_proposals_total - filtered_proposals_total, ['rpn'])

        # Get top `pre_nms_top_n` indices by sorting the proposals by score.
        k = tf.minimum(self._pre_nms_top_n, tf.shape(unsorted_scores)[0])
        top_k = tf.nn.top_k(unsorted_scores, k=k)

        sorted_top_proposals = tf.gather(unsorted_proposals, top_k.indices)
        sorted_top_scores = top_k.values

        if self._apply_nms:
            with tf.name_scope('nms'):
                # We reorder the proposals into TensorFlows bounding box order
                # for `tf.image.non_max_supression` compatibility.
                proposals_tf_order = change_order(sorted_top_proposals)
                # We cut the pre_nms filter in pure TF version and go straight
                # into NMS.
                selected_indices = tf.image.non_max_suppression(
                    proposals_tf_order, tf.reshape(
                        sorted_top_scores, [-1]
                    ),
                    self._post_nms_top_n, iou_threshold=self._nms_threshold
                )

                # Selected_indices is a smaller tensor, we need to extract the
                # proposals and scores using it.
                nms_proposals_tf_order = tf.gather(
                    proposals_tf_order, selected_indices,
                    name='gather_nms_proposals'
                )

                # We switch back again to the regular bbox encoding.
                proposals = change_order(nms_proposals_tf_order)
                scores = tf.gather(
                    sorted_top_scores, selected_indices,
                    name='gather_nms_proposals_scores'
                )
        else:
            proposals = sorted_top_proposals
            scores = sorted_top_scores

        if self._clip_after_nms:
            # Clip proposals to the image after NMS.
            proposals = clip_boxes(proposals, im_shape)

        pred = {
            'proposals': proposals,
            'scores': scores,
        }

        if self._debug:
            pred.update({
                'sorted_top_scores': sorted_top_scores,
                'sorted_top_proposals': sorted_top_proposals,
                'unsorted_proposals': unsorted_proposals,
                'unsorted_scores': unsorted_scores,
                'all_proposals': all_proposals,
                'all_scores': all_scores,
                # proposals_unclipped has the unsorted_scores scores
                'proposals_unclipped': proposals_unclipped,
            })

        return pred
Exemple #6
0
    def _build(self, rpn_cls_prob, rpn_bbox_pred, all_anchors, im_shape):
        """

        Args:
            rpn_cls_prob: A Tensor with the softmax output for each anchor.
                Its shape should be (total_anchors, 2), with the probability of
                being background and the probability of being foreground for
                each anchor.
                rpn预测的类别的概率
            rpn_bbox_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
                rpn预测的框
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
                进入rpn的anchors
            im_shape: A Tensor with the image shape in format (height, width).

        Returns:
            prediction_dict with the following keys:
                proposals: A Tensor with the final selected proposed
                    bounding boxes. Its shape should be
                    (total_proposals, 4).
                    最终确定的提案区域
                scores: A Tensor with the probability of being an
                    object for that proposal. Its shape should be
                    (total_proposals, 1)
                    提案是目标的概率
        """
        # Scores are extracted from the second scalar of the cls probability.
        # cls_probability is a softmax of (background, foreground).
        # 得分从类概率的第二个标量中提出
        # 类概率是一个关于前景背景的softmax分类结果
        all_scores = rpn_cls_prob[:, 1]
        # Force flatten the scores (it should be already be flatten).
        # 这里这么做,还有必要么?还是说只是为了确保万无一失?
        all_scores = tf.reshape(all_scores, [-1])

        if self._filter_outside_anchors:
            with tf.name_scope('filter_outside_anchors'):
                # 沿着指定维度进行拆分,保留剩余的维度 原本为(total_anchors, 4)
                # 拆分为四个独立的anchor数目为长度的张量,聚合了四个坐标的值
                (x_min_anchor, y_min_anchor, x_max_anchor,
                 y_max_anchor) = tf.unstack(all_anchors, axis=1)

                # 逻辑操作,判断是否超界,对于图像,横为x纵为y
                # 所以im_shape[0]对应着y,im_shape[1]对应着x
                # im_shape in format (height, width).
                # 对左上角和右下角坐标在图像范围内的对应的张量判定为真,其余为假
                # 筛选出来没有超界的anchor,顺带得到对应的预测边框和得分
                anchor_filter = tf.logical_and(
                    tf.logical_and(tf.greater_equal(x_min_anchor, 0),
                                   tf.greater_equal(y_min_anchor, 0)),
                    tf.logical_and(tf.less(x_max_anchor, im_shape[1]),
                                   tf.less(y_max_anchor, im_shape[0])))
                anchor_filter = tf.reshape(anchor_filter, [-1])
                all_anchors = tf.boolean_mask(all_anchors,
                                              anchor_filter,
                                              name='filter_anchors')
                rpn_bbox_pred = tf.boolean_mask(rpn_bbox_pred, anchor_filter)
                all_scores = tf.boolean_mask(all_scores, anchor_filter)

        # Decode boxes
        # 从参考的anchors和预测的偏移量获得最终预测的原图的框坐标
        all_proposals = decode(all_anchors, rpn_bbox_pred)

        # Filter proposals with less than threshold probability.
        # 滤掉小于概率阈值的得分,得到的是一个代表大于等于阈值的元素位置的张量
        min_prob_filter = tf.greater_equal(all_scores,
                                           self._min_prob_threshold)

        # Filter proposals with negative or zero area.
        # 因为要求xmax>xmin, ymax>ymin,所以需要保证正常的计算面积要为正
        (x_min, y_min, x_max, y_max) = tf.unstack(all_proposals, axis=1)
        zero_area_filter = tf.greater(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0)
        # 得到的是一个面积为正的提案区域的逻辑张量,也对应着数据有效的位置
        proposal_filter = tf.logical_and(zero_area_filter, min_prob_filter)

        # Filter proposals and scores.
        # all_scores = rpn_cls_prob[:, 1]
        # 下面两步boolean_mask得到了对应要保留的得分和提案
        all_proposals_total = tf.shape(all_scores)[0]
        unsorted_scores = tf.boolean_mask(all_scores,
                                          proposal_filter,
                                          name='filtered_scores')
        unsorted_proposals = tf.boolean_mask(all_proposals,
                                             proposal_filter,
                                             name='filtered_proposals')
        if self._debug:
            proposals_unclipped = tf.identity(unsorted_proposals)

        # Run clipping of proposals after running NMS.
        # 不在NMS后,而是在其前运行提案剪裁
        # clip_boxes对于位于图像区域之外的提案框进行了一定的限制
        if not self._clip_after_nms:
            # Clip proposals to the image.
            unsorted_proposals = clip_boxes(unsorted_proposals, im_shape)

        filtered_proposals_total = tf.shape(unsorted_scores)[0]

        tf.summary.scalar('valid_proposals_ratio',
                          (tf.cast(filtered_proposals_total, tf.float32) /
                           tf.cast(all_proposals_total, tf.float32)), ['rpn'])

        tf.summary.scalar('invalid_proposals',
                          all_proposals_total - filtered_proposals_total,
                          ['rpn'])

        # Get top `pre_nms_top_n` indices by sorting the proposals by score.
        # NMS之前排序获得前N个提案,但要保证N<=shape[0]
        k = tf.minimum(self._pre_nms_top_n, tf.shape(unsorted_scores)[0])
        # 查找最后一个维度的k个最大条目的值和索引。
        top_k = tf.nn.top_k(unsorted_scores, k=k)

        # 根据索引,从unsorted_proposals上采集切片,同时获取对应的得分
        sorted_top_proposals = tf.gather(unsorted_proposals, top_k.indices)
        sorted_top_scores = top_k.values

        if self._apply_nms:
            with tf.name_scope('nms'):
                # We reorder the proposals into TensorFlows bounding box order
                # for `tf.image.non_max_supression` compatibility.
                # 为了与“tf.image.non_max_supression”兼容,我们将提案重新排序到
                # TensorFlow边框顺序中。
                proposals_tf_order = change_order(sorted_top_proposals)
                # We cut the pre_nms filter in pure TF version and go straight
                # into NMS.
                # 修剪掉与以前选择的框重叠的具有高度IOU的框
                selected_indices = tf.image.non_max_suppression(
                    proposals_tf_order,
                    tf.reshape(sorted_top_scores, [-1]),
                    self._post_nms_top_n,
                    iou_threshold=self._nms_threshold)

                # Selected_indices is a smaller tensor, we need to extract the
                # proposals and scores using it.
                nms_proposals_tf_order = tf.gather(proposals_tf_order,
                                                   selected_indices,
                                                   name='gather_nms_proposals')

                # We switch back again to the regular bbox encoding.
                # 改回原始的提案编码
                proposals = change_order(nms_proposals_tf_order)
                scores = tf.gather(sorted_top_scores,
                                   selected_indices,
                                   name='gather_nms_proposals_scores')
        else:
            proposals = sorted_top_proposals
            scores = sorted_top_scores

        # 在NMS后运行提案剪裁
        if self._clip_after_nms:
            # Clip proposals to the image after NMS.
            proposals = clip_boxes(proposals, im_shape)

        pred = {
            'proposals': proposals,
            'scores': scores,
        }

        if self._debug:
            pred.update({
                'sorted_top_scores': sorted_top_scores,
                'sorted_top_proposals': sorted_top_proposals,
                'unsorted_proposals': unsorted_proposals,
                'unsorted_scores': unsorted_scores,
                'all_proposals': all_proposals,
                'all_scores': all_scores,
                # proposals_unclipped has the unsorted_scores scores
                'proposals_unclipped': proposals_unclipped,
            })

        return pred
        def build_without_filter(class_objects, cls_prob, cls_label):
            selected_boxes = []
            selected_probs = []
            selected_labels = []

            # For each class, take the proposals with the class-specific
            # predictions (class scores and bbox regression) and filter accordingly
            # (valid area, min probability score and NMS).
            for class_id in range(self._num_classes):
                # Apply the class-specific transformations to the proposals to
                # obtain the current class' prediction.
                label_filer = tf.reshape(tf.where(tf.equal(class_id, cls_label)), [-1])

                class_objects_filtered, cls_prob_filtered = map(lambda x: tf.gather(x, label_filer), [class_objects, cls_prob])

                # Filter objects based on the min probability threshold and on them
                # having a valid area.

                #### for filter trivial padding conclusion
                prob_filter = tf.greater_equal(
                    cls_prob_filtered, 0.2
                )

                (x_min, y_min, x_max, y_max) = tf.unstack(class_objects_filtered, axis=1)

                area_filter = tf.greater(
                    tf.maximum(x_max - x_min, 0.0)
                    * tf.maximum(y_max - y_min, 0.0),
                    0.0
                )

                object_filter = tf.logical_and(area_filter, prob_filter)

                class_objects_filtered = tf.boolean_mask(class_objects_filtered, object_filter)
                cls_prob_filtered = tf.boolean_mask(cls_prob_filtered, object_filter)

                # We have to use the TensorFlow's bounding box convention to use
                # the included function for NMS.
                class_objects_tf = change_order(class_objects_filtered)

                # Apply class NMS.
                class_selected_idx = tf.image.non_max_suppression(
                    class_objects_tf, cls_prob_filtered, self._class_max_detections,
                    iou_threshold=self._class_nms_threshold
                )

                # Using NMS resulting indices, gather values from Tensors.
                class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
                class_prob = tf.gather(cls_prob_filtered, class_selected_idx)

                # Revert to our bbox convention.
                class_objects_tf = change_order(class_objects_tf)

                # We append values to a regular list which will later be
                # transformed to a proper Tensor.
                selected_boxes.append(class_objects_tf)
                selected_probs.append(class_prob)
                # In the case of the class_id, since it is a loop on classes, we
                # already have a fixed class_id. We use `tf.tile` to create that
                # Tensor with the total number of indices returned by the NMS.

                selected_labels.append(
                    tf.tile([class_id], [tf.shape(class_selected_idx)[0]])
                )

            # We use concat (axis=0) to generate a Tensor where the rows are
            # stacked on top of each other
            objects = tf.concat(selected_boxes, axis=0)
            proposal_label = tf.concat(selected_labels, axis=0)
            proposal_label_prob = tf.concat(selected_probs, axis=0)

            # Get top-k detections of all classes.
            k = tf.minimum(
                self._total_max_detections,
                tf.shape(proposal_label_prob)[0]
            )
            top_k = tf.nn.top_k(proposal_label_prob, k=k)
            top_k_proposal_label_prob = top_k.values
            top_k_objects = tf.gather(objects, top_k.indices)
            top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

            return (top_k_objects, top_k_proposal_label, top_k_proposal_label_prob)
    def _build(self, proposals, bbox_pred, cls_prob, im_shape):
        """
        Args:
            proposals: Tensor with the RPN proposals bounding boxes.
                Shape (num_proposals, 4). Where num_proposals is less than
                POST_NMS_TOP_N (We don't know exactly beforehand)
            bbox_pred: Tensor with the RCNN delta predictions for each proposal
                for each class. Shape (num_proposals, 4 * num_classes)
            cls_prob: A softmax probability for each proposal where the idx = 0
                is the background class (which we should ignore).
                Shape (num_proposals, num_classes + 1)

        Returns:
            objects:
                Shape (final_num_proposals, 4)
                Where final_num_proposals is unknown before-hand (it depends on
                NMS). The 4-length Tensor for each corresponds to:
                (x_min, y_min, x_max, y_max).
            objects_label:
                Shape (final_num_proposals,)
            objects_label_prob:
                Shape (final_num_proposals,)

        """
        with tf.variable_scope("build_without_filter"):
            without_filter_dict = self.build_without_filter(
                proposals, bbox_pred, cls_prob, im_shape
            )

        selected_boxes = []
        selected_probs = []
        selected_labels = []

        # For each class, take the proposals with the class-specific
        # predictions (class scores and bbox regression) and filter accordingly
        # (valid area, min probability score and NMS).
        for class_id in range(self._num_classes):
            # Apply the class-specific transformations to the proposals to
            # obtain the current class' prediction.
            class_prob = cls_prob[:, class_id + 1]  # 0 is background class.
            class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)]
            raw_class_objects = decode(
                proposals,
                class_bboxes,
                variances=self._variances,
            )

            # Clip bboxes so they don't go out of the image.
            class_objects = clip_boxes(raw_class_objects, im_shape)

            # Filter objects based on the min probability threshold and on them
            # having a valid area.
            prob_filter = tf.greater_equal(
                class_prob, self._min_prob_threshold
            )

            (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1)
            area_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.0)
                * tf.maximum(y_max - y_min, 0.0),
                0.0
            )

            object_filter = tf.logical_and(area_filter, prob_filter)

            class_objects = tf.boolean_mask(class_objects, object_filter)
            class_prob = tf.boolean_mask(class_prob, object_filter)

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            class_objects_tf = change_order(class_objects)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf, class_prob, self._class_max_detections,
                iou_threshold=self._class_nms_threshold
            )

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # Revert to our bbox convention.
            class_objects = change_order(class_objects_tf)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            selected_boxes.append(class_objects)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]])
            )

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        objects = tf.concat(selected_boxes, axis=0)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        tf.summary.histogram(
            'proposal_cls_scores', proposal_label_prob, ['rcnn']
        )

        # Get top-k detections of all classes.
        k = tf.minimum(
            self._total_max_detections,
            tf.shape(proposal_label_prob)[0]
        )
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)


        return {
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,

            "without_filter_dict": without_filter_dict
        }
    def build_without_filter(self, proposals, bbox_pred, cls_prob, im_shape):
        selected_boxes = []
        selected_probs = []
        selected_labels = []

        # For each class, take the proposals with the class-specific
        # predictions (class scores and bbox regression) and filter accordingly
        # (valid area, min probability score and NMS).
        for class_id in range(self._num_classes):
            # Apply the class-specific transformations to the proposals to
            # obtain the current class' prediction.
            class_prob = cls_prob[:, class_id + 1]  # 0 is background class.
            class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)]
            raw_class_objects = decode(
                proposals,
                class_bboxes,
                variances=self._variances,
            )

            # Clip bboxes so they don't go out of the image.
            class_objects = clip_boxes(raw_class_objects, im_shape)

            # Filter objects based on the min probability threshold and on them
            # having a valid area.
            ##### train for 0.7

            prob_filter = tf.greater_equal(
                class_prob, 0.7
            )

            (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1)

            area_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.0)
                * tf.maximum(y_max - y_min, 0.0),
                76654.0
            )

            object_filter = tf.logical_and(area_filter, prob_filter)

            class_objects = tf.boolean_mask(class_objects, object_filter)
            class_prob = tf.boolean_mask(class_prob, object_filter)

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            class_objects_tf = change_order(class_objects)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf, class_prob, self._class_max_detections,
                iou_threshold=self._class_nms_threshold
            )

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # Revert to our bbox convention.
            class_objects = change_order(class_objects_tf)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            selected_boxes.append(class_objects)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.

            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]])
            )

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        objects = tf.concat(selected_boxes, axis=0)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        tf.summary.histogram(
            'proposal_cls_scores', proposal_label_prob, ['rcnn']
        )

        # Get top-k detections of all classes.
        k = tf.minimum(
            self._total_max_detections,
            tf.shape(proposal_label_prob)[0]
        )
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

        return {
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,
        }
Exemple #10
0
    def _build(self, cls_prob, loc_pred, all_anchors, im_shape):
        """
        Args:
            cls_prob: A softmax probability for each anchor where the idx = 0
                is the background class (which we should ignore).
                Shape (total_anchors, num_classes + 1)
                预测类别概率
            loc_pred: A Tensor with the regression output for each anchor.
                Its shape should be (total_anchors, 4).
                预测框偏移缩放量
            all_anchors: A Tensor with the anchors bounding boxes of shape
                (total_anchors, 4), having (x_min, y_min, x_max, y_max) for
                each anchor.
                所有anchors的真实坐标
            im_shape: A Tensor with the image shape in format (height, width).
        Returns:
            prediction_dict with the following keys:
                raw_proposals: The raw proposals i.e. the anchors adjusted
                    using loc_pred.
                proposals: The proposals of the network after appling some
                    filters like negative area; and NMS. It's shape is
                    (final_num_proposals, 4), where final_num_proposals is
                    unknown before-hand (it depends on NMS).
                    The 4-length Tensor for each corresponds to:
                    (x_min, y_min, x_max, y_max).
                proposal_label: It's shape is (final_num_proposals,)
                proposal_label_prob: It's shape is (final_num_proposals,)
        """
        selected_boxes = []
        selected_probs = []
        selected_labels = []
        selected_anchors = []  # For debugging

        # 分析各类别下, 大于最小概率阈值的预测概率和预测偏移缩放量, 进而以此获得预测的边界
        # 框的坐标, 进行边界剪裁, 坐标合理性限定, NMS处理, 得到最终选定的各个类别下的提案
        for class_id in range(self._num_classes):
            # Get the confidences for this class (+ 1 is to ignore background)
            # 获取该类别下, 所有预测框的情况
            class_cls_prob = cls_prob[:, class_id + 1]

            # Filter by min_prob_threshold
            min_prob_filter = tf.greater_equal(class_cls_prob,
                                               self._min_prob_threshold)
            class_cls_prob = tf.boolean_mask(class_cls_prob, min_prob_filter)
            class_loc_pred = tf.boolean_mask(loc_pred, min_prob_filter)
            # 对所有anchors进行筛选
            anchors = tf.boolean_mask(all_anchors, min_prob_filter)

            # Using the loc_pred and the anchors, we generate the proposals.
            raw_proposals = decode(anchors, class_loc_pred, self._variances)

            # Clip boxes to image.
            clipped_proposals = clip_boxes(raw_proposals, im_shape)

            # Filter proposals that have an non-valid area.
            (x_min, y_min, x_max, y_max) = tf.unstack(clipped_proposals,
                                                      axis=1)
            proposal_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.) * tf.maximum(y_max - y_min, 0.),
                0.)
            # 筛选剪裁后的框坐标
            class_proposals = tf.boolean_mask(clipped_proposals,
                                              proposal_filter)
            # 筛选边界框偏移
            class_loc_pred = tf.boolean_mask(class_loc_pred, proposal_filter)
            # 筛选类别概率
            class_cls_prob = tf.boolean_mask(class_cls_prob, proposal_filter)
            # 筛选对应的anchors
            proposal_anchors = tf.boolean_mask(anchors, proposal_filter)

            # Log results of filtering non-valid area proposals
            # 所有anchors数量
            total_anchors = tf.shape(all_anchors)[0]
            # 所有坐标有效的框数量
            total_proposals = tf.shape(class_proposals)[0]
            # ques: 所有框的数量, 这里数量和anchors应该是一样的吧?
            # ans: 不一样, 未进行坐标和理性判断时框的总数, 但是已经进行了阈值判断
            total_raw_proposals = tf.shape(raw_proposals)[0]

            tf.summary.scalar('invalid_proposals',
                              total_proposals - total_raw_proposals, ['ssd'])
            tf.summary.scalar(
                'valid_proposals_ratio',
                tf.cast(total_anchors, tf.float32) /
                tf.cast(total_proposals, tf.float32), ['ssd'])

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            # After gathering results we should normalize it back.
            class_proposal_tf = change_order(class_proposals)

            # Apply class NMS.
            # 使用该类别下所有预测的框坐标, 和对应的预测概率, 进行非极大值抑制, 得到索引
            # 剩下来的就认为是该类别下的结果, 也就是这个类别选择了这几个预测
            class_selected_idx = tf.image.non_max_suppression(
                class_proposal_tf,
                class_cls_prob,
                self._class_max_detections,
                iou_threshold=self._class_nms_threshold)

            # Using NMS resulting indices, gather values from Tensors.
            # 获得该类别选择的预测框和对应的类别预测概率
            class_proposal_tf = tf.gather(class_proposal_tf,
                                          class_selected_idx)
            class_cls_prob = tf.gather(class_cls_prob, class_selected_idx)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            #  获得该类别选择的预测框和对应的类别预测概率
            selected_boxes.append(class_proposal_tf)
            selected_probs.append(class_cls_prob)

            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            # 重复张量, 沿着后面指定的各个维度上的次数来进行重复
            # 与下面的的张量里的anchors相对应, 表示其类别标签
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))
            # 确定该类别下所有坐标合理概率超过阈值的对应的anchors
            selected_anchors.append(proposal_anchors)

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        # (num_proposals, 4)
        proposals_tf = tf.concat(selected_boxes, axis=0)
        # Return to the original convention.
        proposals = change_order(proposals_tf)
        # (num_proposals, )
        proposal_label = tf.concat(selected_labels, axis=0)
        # (num_proposals, )
        proposal_label_prob = tf.concat(selected_probs, axis=0)
        # # (num_proposals, 4)
        proposal_anchors = tf.concat(selected_anchors, axis=0)

        # Get topK detections of all classes.
        k = tf.minimum(self._total_max_detections,
                       tf.shape(proposal_label_prob)[0])

        # 主题顺序是按照proposal_label_prob为参考的, 其中有各个类的结果, 顺序大致是按照
        # 类别来的, 下面的都是, 所以使用同一个索引是可以
        top_k = tf.nn.top_k(proposal_label_prob, k=k)

        # 依次获得NMS后前k个最大的预测概率值, 对应的预测框坐标组, 各类别中保留下来的提案对
        # 应的该类别, 对应的参考anchors坐标
        top_k_proposal_label_prob = top_k.values
        top_k_proposals = tf.gather(proposals, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)
        top_k_proposal_anchors = tf.gather(proposal_anchors, top_k.indices)

        return {
            'objects': top_k_proposals,
            'labels': top_k_proposal_label,
            'probs': top_k_proposal_label_prob,
            'raw_proposals': raw_proposals,
            'anchors': top_k_proposal_anchors,
        }
Exemple #11
0
    def _build(self, proposals, bbox_pred, cls_prob, im_shape):
        """
        Args:
            proposals: Tensor with the RPN proposals bounding boxes.
                Shape (num_proposals, 4). Where num_proposals is less than
                POST_NMS_TOP_N (We don't know exactly beforehand)
            bbox_pred: Tensor with the RCNN delta predictions for each proposal
                for each class. Shape (num_proposals, 4 * num_classes)
            cls_prob: A softmax probability for each proposal where the idx = 0
                is the background class (which we should ignore).
                Shape (num_proposals, num_classes + 1)

        Returns:
            objects:
                Shape (final_num_proposals, 4)
                Where final_num_proposals is unknown before-hand (it depends on
                NMS). The 4-length Tensor for each corresponds to:
                (x_min, y_min, x_max, y_max).
            objects_label:
                Shape (final_num_proposals,)
            objects_label_prob:
                Shape (final_num_proposals,)

        """
        # First we want get the most probable label for each proposal
        # We still have the background on idx 0 so we subtract 1 to the idxs.
        proposal_label = tf.argmax(cls_prob, axis=1) - 1
        # Get the probability for the selected label for each proposal.
        proposal_label_prob = tf.reduce_max(cls_prob, axis=1)

        # We are going to use only the non-background proposals.
        non_background_filter = tf.greater_equal(proposal_label, 0)
        # Filter proposals with less than threshold probability.
        min_prob_filter = tf.greater_equal(
            proposal_label_prob, self._min_prob_threshold
        )
        proposal_filter = tf.logical_and(
            non_background_filter, min_prob_filter
        )

        total_proposals = tf.shape(proposals)[0]

        equal_shapes = tf.assert_equal(
            tf.shape(proposals)[0], tf.shape(bbox_pred)[0]
        )
        with tf.control_dependencies([equal_shapes]):
            # Filter all tensors for getting all non-background proposals.
            proposals = tf.boolean_mask(
                proposals, proposal_filter)
            proposal_label = tf.boolean_mask(
                proposal_label, proposal_filter)
            proposal_label_prob = tf.boolean_mask(
                proposal_label_prob, proposal_filter)
            bbox_pred = tf.boolean_mask(
                bbox_pred, proposal_filter)

        filtered_proposals = tf.shape(proposals)[0]

        tf.summary.scalar(
            'background_or_low_prob_proposals',
            total_proposals - filtered_proposals,
            ['rcnn']
        )

        # Create one hot with labels for using it to filter bbox_predictions.
        label_one_hot = tf.one_hot(proposal_label, depth=self._num_classes)
        # Flatten label_one_hot to get
        # (num_non_background_proposals * num_classes, 1) for filtering.
        label_one_hot_flatten = tf.cast(
            tf.reshape(label_one_hot, [-1]), tf.bool
        )
        # Flatten bbox_predictions getting
        # (num_non_background_proposals * num_classes, 4).
        bbox_pred_flatten = tf.reshape(bbox_pred, [-1, 4])

        equal_shapes = tf.assert_equal(
            tf.shape(bbox_pred_flatten)[0], tf.shape(label_one_hot_flatten)[0]
        )
        with tf.control_dependencies([equal_shapes]):
            # Control same number of dimensions between bbox and mask.
            bbox_pred = tf.boolean_mask(
                bbox_pred_flatten, label_one_hot_flatten)

        # Using the bbox_pred and the proposals we generate the objects.
        raw_objects = decode(proposals, bbox_pred)
        # Clip boxes to image.
        clipped_objects = clip_boxes(raw_objects, im_shape)

        # Filter objects that have an non-valid area.
        (x_min, y_min, x_max, y_max) = tf.unstack(clipped_objects, axis=1)
        object_filter = tf.greater_equal(
            tf.maximum(x_max - x_min, 0.0) * tf.maximum(y_max - y_min, 0.0),
            0.0
        )

        total_raw_objects = tf.shape(raw_objects)[0]
        objects = tf.boolean_mask(
            clipped_objects, object_filter)
        proposal_label = tf.boolean_mask(
            proposal_label, object_filter)
        proposal_label_prob = tf.boolean_mask(
            proposal_label_prob, object_filter)

        total_objects = tf.shape(objects)[0]

        tf.summary.scalar(
            'invalid_proposals',
            total_objects - total_raw_objects, ['rcnn']
        )

        valid_proposals_ratio = (
            tf.cast(total_proposals, tf.float32) /
            tf.cast(total_objects, tf.float32)
        )

        tf.summary.scalar(
            'valid_proposals_ratio', valid_proposals_ratio, ['rcnn']
        )

        # We have to use the TensorFlow's bounding box convention to use the
        # included function for NMS.
        # After gathering results we should normalize it back.
        objects_tf = change_order(objects)

        selected_boxes = []
        selected_probs = []
        selected_labels = []
        # For each class we want to filter those objects and apply NMS to them.
        for class_id in range(self._num_classes):
            # Filter objects Tensors with class.
            class_filter = tf.equal(proposal_label, class_id)
            class_objects_tf = tf.boolean_mask(objects_tf, class_filter)
            class_prob = tf.boolean_mask(proposal_label_prob, class_filter)

            # Apply class NMS.
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf, class_prob, self._class_max_detections,
                iou_threshold=self._class_nms_threshold
            )

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # We append values to a regular list which will later be transform
            # to a proper Tensor.
            selected_boxes.append(class_objects_tf)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]])
            )

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        objects_tf = tf.concat(selected_boxes, axis=0)
        # Return to the original convention.
        objects = change_order(objects_tf)
        proposal_label = tf.concat(selected_labels, axis=0)
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        # Get topK detections of all classes.
        k = tf.minimum(
            self._total_max_detections,
            tf.shape(proposal_label_prob)[0]
        )
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

        return {
            'raw_objects': raw_objects,
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,
        }
Exemple #12
0
    def _build(self, proposals, bbox_pred, cls_prob, im_shape):
        """
        Args:
            这个是RPN的输出
            proposals: Tensor with the RPN proposals bounding boxes.
                Shape (num_proposals, 4). Where num_proposals is less than
                POST_NMS_TOP_N (We don't know exactly beforehand)
                RPN边界框数据
            这两个是RCNN的输出
            bbox_pred: Tensor with the RCNN delta predictions for each proposal
                for each class. Shape (num_proposals, 4 * num_classes)
                RCNN针对每个(上面的RPN的)提案框在每个类别下的预测偏移量和缩放量
            cls_prob: A softmax probability for each proposal where the idx = 0
                is the background class (which we should ignore).
                Shape (num_proposals, num_classes + 1)
                对于每个边界框针对各个类别的softmax概率

        Returns:
            objects:
                Shape (final_num_proposals, 4)
                Where final_num_proposals is unknown before-hand (it depends on
                NMS). The 4-length Tensor for each corresponds to:
                (x_min, y_min, x_max, y_max).
                最终保留下来的边界框的坐标集合
            objects_label:
                Shape (final_num_proposals,)
            objects_label_prob:
                Shape (final_num_proposals,)
        """
        selected_boxes = []
        selected_probs = []
        selected_labels = []

        # For each class, take the proposals with the class-specific
        # predictions (class scores and bbox regression) and filter accordingly
        # (valid area, min probability score and NMS).
        # 对每个类别, 取其类特定预测的提案(类得分和边界框偏移缩放), 并根据合法区域, 最小概率
        # 得分, NMS来进行过滤
        # 对于class_id对应的类别进行如下的操作:
        # ...
        for class_id in range(self._num_classes):
            # Apply the class-specific transformations to the proposals to
            # obtain the current class' prediction.
            # 应用特定类别的转化到提案上, 来获取当前类别的预测
            # 获取该类别下所有提案的类别预测结果, 以及边界框预测结果
            class_prob = cls_prob[:, class_id + 1]  # 0 is background class.
            class_bboxes = bbox_pred[:, (4 * class_id):(4 * class_id + 4)]
            # 针对该类, 从RCNN预测的偏移量(class_bboxes)和RPN输出的参考值(proposals)得
            # 到的预测的左上角和右下角坐标, 获得RCNN的预测的框的真实坐标
            raw_class_objects = decode(
                proposals,  # (num_proposals, 4)
                class_bboxes,  # (num_proposals, 4)
                variances=self._variances,
            )

            # Clip bboxes so they don't go out of the image.
            # 对超出图像的边界框部分进行裁剪, 得到属于图像内部的边界框
            # (num_proposals, 4)
            class_objects = clip_boxes(raw_class_objects, im_shape)

            # Filter objects based on the min probability threshold and on them
            # having a valid area.
            # 对于该类别预测概率大于等于阈值的数据进行筛选
            prob_filter = tf.greater_equal(class_prob,
                                           self._min_prob_threshold)

            (x_min, y_min, x_max, y_max) = tf.unstack(class_objects, axis=1)
            # 要确保,x_max - x_min, y_max - y_min同号, 也就是保证计算面积为正
            area_filter = tf.greater(
                tf.maximum(x_max - x_min, 0.0) *
                tf.maximum(y_max - y_min, 0.0), 0.0)

            # 上面两条判定都要满足
            object_filter = tf.logical_and(area_filter, prob_filter)

            # 满足上面两条要求的RCNN预测边界框坐标
            class_objects = tf.boolean_mask(class_objects, object_filter)
            # 满足要求的RCNN预测边界框针对该类别的概率
            class_prob = tf.boolean_mask(class_prob, object_filter)

            # We have to use the TensorFlow's bounding box convention to use
            # the included function for NMS.
            class_objects_tf = change_order(class_objects)

            # Apply class NMS.
            # NMS后得到保留的边界框的索引, 此时保留的也就是该类别下最终保留的
            # 保留下来的数量是一定的, 由self._class_max_detections(Maximum number
            # of detections for each class.)确定
            class_selected_idx = tf.image.non_max_suppression(
                class_objects_tf,
                class_prob,
                self._class_max_detections,
                iou_threshold=self._class_nms_threshold)

            # Using NMS resulting indices, gather values from Tensors.
            class_objects_tf = tf.gather(class_objects_tf, class_selected_idx)
            class_prob = tf.gather(class_prob, class_selected_idx)

            # Revert to our bbox convention.
            class_objects = change_order(class_objects_tf)

            # We append values to a regular list which will later be
            # transformed to a proper Tensor.
            # 这里选定的是该类别下, 经过"边界剪裁(不会删除边界框), 对于该类别预测概率限定+坐
            # 标合理性限定+NMS(都会删除边界框)"处理后剩下的预测框的原图坐标和对应的预测概率
            selected_boxes.append(class_objects)
            selected_probs.append(class_prob)
            # In the case of the class_id, since it is a loop on classes, we
            # already have a fixed class_id. We use `tf.tile` to create that
            # Tensor with the total number of indices returned by the NMS.
            # 这里利用tile重复张量[class_id]了tf.shape(class_selected_idx)[0]次,
            # 生成了与剩下来的边界框的数量相同的长度的张量, 对应着selected_probs, 表述其中
            # 的边界框对应的类别
            selected_labels.append(
                tf.tile([class_id], [tf.shape(class_selected_idx)[0]]))

        # We use concat (axis=0) to generate a Tensor where the rows are
        # stacked on top of each other
        # selected_boxes ([num_classes, num_pred_after_nms, 4])
        objects = tf.concat(selected_boxes, axis=0)
        # selected_labels ([num_classes, num_pred_after_nms, 1])
        proposal_label = tf.concat(selected_labels, axis=0)
        # selected_probs ([num_classes, num_pred_after_nms, 1])
        proposal_label_prob = tf.concat(selected_probs, axis=0)

        tf.summary.histogram('proposal_cls_scores', proposal_label_prob,
                             ['rcnn'])

        # Get top-k detections of all classes.
        k = tf.minimum(self._total_max_detections,
                       tf.shape(proposal_label_prob)[0])
        # 获得所有框的所有类别的预测概率中, 前k个最大的结果, 概率, 坐标, 类别标签
        top_k = tf.nn.top_k(proposal_label_prob, k=k)
        top_k_proposal_label_prob = top_k.values
        top_k_objects = tf.gather(objects, top_k.indices)
        top_k_proposal_label = tf.gather(proposal_label, top_k.indices)

        return {
            'objects': top_k_objects,
            'proposal_label': top_k_proposal_label,
            'proposal_label_prob': top_k_proposal_label_prob,
            'selected_boxes': selected_boxes,
            'selected_probs': selected_probs,
            'selected_labels': selected_labels,
        }