def _get_proposals_single(self,
                              rpn_probs,
                              rpn_deltas,
                              mlvl_anchors,
                              img_shape,
                              with_probs,
                              training=True):
        """
        Calculate proposals per image
        Args:
        Returns:
        """
        if training:
            num_pre_nms = self.num_pre_nms_train
            proposal_count = self.num_post_nms_train
        else:
            num_pre_nms = self.num_pre_nms_test
            proposal_count = self.num_post_nms_test

        level_ids = []
        mlvl_scores = []
        mlvl_deltas = []
        mlvl_valid_anchors = []

        mlvl_proposals = []
        num_levels = len(rpn_probs)
        for idx in range(num_levels):
            level_probs = tf.reshape(rpn_probs[idx],
                                     [-1, 2])  # H, W, probs -> H * W, probs
            level_scores = level_probs[:, 1]
            level_deltas = tf.reshape(rpn_deltas[idx], [-1, 4])
            level_anchors = mlvl_anchors[idx]
            pre_nms_limit = tf.math.minimum(num_pre_nms,
                                            tf.shape(level_anchors)[0])
            ix = tf.nn.top_k(level_scores, pre_nms_limit, sorted=False).indices
            level_scores = tf.gather(level_scores, ix)
            level_deltas = tf.gather(level_deltas, ix)
            level_anchors = tf.gather(level_anchors, ix)
            mlvl_scores.append(level_scores)
            mlvl_deltas.append(level_deltas)
            mlvl_valid_anchors.append(level_anchors)
            level_ids.append(tf.fill([
                tf.shape(level_scores)[0],
            ], idx))
        scores = tf.concat(mlvl_scores, axis=0)
        anchors = tf.concat(mlvl_valid_anchors, axis=0)
        deltas = tf.concat(mlvl_deltas, axis=0)

        # get refined anchors
        proposals = transforms.delta2bbox(anchors, deltas, self.target_means,
                                          self.target_stds)
        # Clip to valid area
        window = tf.stack([0., 0., img_shape[0], img_shape[1]])
        proposals = transforms.bbox_clip(proposals, window)
        ids = tf.concat(level_ids, axis=0)
        # NMS is appied per level independent of others
        keep = self.batched_nms(proposals, scores, ids, proposal_count,
                                self.nms_threshold)
        proposals = tf.gather(proposals, keep)
        return tf.stop_gradient(proposals)
Beispiel #2
0
 def call(self, inputs, training=True):
     '''
     Args
     ---
         proposals_list: List of Tensors of shape [num_proposals, (ymin, xmin, ymax, xmax)]
             num_proposals=levels * proposals per level. levels refer to FPN levels. 
             Length of list is the batch size
         gt_boxes: Tensor of shape [batch_size, 4]
         gt_class_ids: Tensor of shape [batch_size]
         img_metas: Tensor of shape [11]
         rcnn_feature_maps: List of outputs from the FPN
     '''
     if training:
         proposals_list, rcnn_feature_maps, gt_boxes, \
         gt_class_ids, img_metas = inputs
     else:
         proposals_list, rcnn_feature_maps, img_metas = inputs
     batch_size = img_metas.shape[0]
     loss_dict = {}
     for i in range(self.num_stages):
         if i == 0:
             rois_list = proposals_list
         if training:
             rois_list, rcnn_target_matches, rcnn_target_deltas, inside_weights, \
                 outside_weights = self.bbox_targets[i].build_targets( \
                 rois_list, gt_boxes, gt_class_ids, img_metas)    
         pooled_regions_list = self.bbox_roi_extractor(
             (rois_list, rcnn_feature_maps, img_metas), training=training)
         rcnn_class_logits, rcnn_probs, rcnn_deltas = self.bbox_heads[i](pooled_regions_list, training=training)
         if training:
             loss_dict['rcnn_class_loss_stage_{}'.format(i)] = losses.rcnn_class_loss(rcnn_class_logits, 
                                                                                      rcnn_target_matches) * self.stage_loss_weights[i]
     
             loss_dict['rcnn_box_loss_stage_{}'.format(i)] = losses.rcnn_bbox_loss(rcnn_deltas,
                                                                                   rcnn_target_deltas, 
                                                                                   inside_weights, 
                                                                                   outside_weights) * self.stage_loss_weights[i]
         roi_shapes = [tf.shape(i)[0] for i in rois_list]
         refinements = tf.split(rcnn_deltas, roi_shapes)
         new_rois = []
         if i<(self.num_stages-1):
             for j in range(batch_size):
                 new_rois.append(tf.stop_gradient(transforms.delta2bbox(rois_list[j], refinements[j],
                                                target_means=self.bbox_heads[i].target_means, \
                                                target_stds=self.bbox_heads[i].target_stds)))
             rois_list = new_rois
     if training:
         return loss_dict
     else:
         detections_list = self.bbox_heads[-1].get_bboxes(rcnn_probs,
                                                         rcnn_deltas,
                                                         rois_list,
                                                         img_metas)
         detections_dict = {
                 'bboxes': detections_list[0][0],
                 'labels': detections_list[0][1],
                 'scores': detections_list[0][2]
         }
         return detections_dict
    def _get_bboxes_single(self, rcnn_probs, rcnn_deltas, rois, img_shape):
        '''
        Args
        ---
            rcnn_probs: [num_rois, num_classes]
            rcnn_deltas: [num_rois, num_classes, (dy, dx, log(dh), log(dw))]
            rois: [num_rois, (y1, x1, y2, x2)]
            img_shape: np.ndarray. [2]. (img_height, img_width)       
        '''
        H = img_shape[0] 
        W = img_shape[1] 
        
        res_scores = tf.TensorArray(tf.float32, size=0, dynamic_size=True, infer_shape=True)
        res_bboxes = tf.TensorArray(tf.float32, size=0, dynamic_size=True, infer_shape=True)
        res_cls = tf.TensorArray(tf.int32, size=0, dynamic_size=True, infer_shape=True)
        for cls_id in range(1, self.num_classes):
            inds = tf.where(rcnn_probs[:, cls_id] > self.min_confidence)[:, 0]
            cls_score = tf.gather(rcnn_probs[:, cls_id], inds)
            rcnn_deltas = tf.reshape(rcnn_deltas, [-1, self.num_classes, 4])
            final_bboxes = transforms.delta2bbox(tf.gather(rois, inds),
                                                tf.gather(rcnn_deltas[:, cls_id, :], inds),
                                                self.target_means, self.target_stds)
            window = tf.stack([tf.constant(0., H.dtype), tf.constant(0., W.dtype), H, W])
            final_bboxes = transforms.bbox_clip(final_bboxes, window)
            cls_score = tf.cast(cls_score, final_bboxes.dtype)
            #keep = tf.image.non_max_suppression(final_bboxes, cls_score,
            #                                    self.max_instances,
            #                                    iou_threshold=self.nms_threshold)
            keep, selected_cls_scores, _ = tf.raw_ops.NonMaxSuppressionV5 (
                                                    boxes=final_bboxes, scores=cls_score,
                                                    max_output_size=self.max_instances,
                                                    iou_threshold=self.nms_threshold,
                                                    score_threshold=0.0,
                                                    soft_nms_sigma=self.soft_nms_sigma)
            pad_size = self.max_instances - tf.size(keep)
            padded_scores = tf.pad(selected_cls_scores, paddings=[[0, pad_size]], constant_values=0.0)
            res_scores = res_scores.write(cls_id-1, padded_scores)#.mark_used()
            padded_bboxes = tf.pad(tf.gather(final_bboxes, keep), paddings=[[0, pad_size], [0, 0]], constant_values=0.0)
            res_bboxes = res_bboxes.write(cls_id-1, padded_bboxes)#.mark_used()
            padded_cls = tf.pad(tf.ones_like(keep, dtype=tf.int32) * cls_id, paddings=[[0, pad_size]], constant_values=-1)
            res_cls = res_cls.write(cls_id-1, padded_cls)#.mark_used()

        res_scores = res_scores.stack()
        res_bboxes = res_bboxes.stack()
        res_cls = res_cls.stack()

        scores_after_nms = tf.reshape(res_scores, [-1])
        bboxes_after_nms = tf.reshape(res_bboxes, [-1, 4])
        cls_after_nms = tf.reshape(res_cls, [-1])
 
        _, final_idx = tf.nn.top_k(scores_after_nms,
                                   k=tf.minimum(self.max_instances, tf.size(scores_after_nms)),
                                   sorted=False)
 
        return (tf.gather(bboxes_after_nms, final_idx),
                tf.gather(cls_after_nms, final_idx),
                tf.gather(scores_after_nms, final_idx))
Beispiel #4
0
 def _get_proposals_single(self,
                           scores_list,
                           deltas_list,
                           anchors_list,
                           img_shape,
                           with_probs,
                           training=True):
     """
     Transform outputs for a single batch item into labeled boxes.
     """
     assert len(scores_list) == len(deltas_list) == len(anchors_list)
     level_ids = []
     mlvl_deltas = []
     mlvl_scores = []
     mlvl_anchors = []
     mlvl_proposals = []
     num_levels = len(scores_list)
     for idx in range(num_levels):
         probs = tf.keras.layers.Activation(
             tf.nn.sigmoid, dtype=tf.float32)(scores_list[idx])
         deltas = deltas_list[idx]
         anchors = anchors_list[idx]
         probs = tf.reshape(probs, [-1, self.num_classes])
         deltas = tf.reshape(deltas, [-1, 4])
         pre_nms_limit = tf.math.minimum(self.num_pre_nms,
                                         tf.shape(anchors)[0])
         max_probs = tf.reduce_max(probs, axis=1)
         ix = tf.nn.top_k(
             max_probs,
             k=pre_nms_limit).indices  # top k for each level (as per paper)
         level_anchors = tf.gather(anchors, ix)
         level_deltas = tf.gather(deltas, ix)
         level_scores = tf.gather(probs, ix)  # these contain max_probs
         mlvl_deltas.append(level_deltas)
         mlvl_scores.append(level_scores)
         mlvl_anchors.append(level_anchors)
     scores = tf.concat(mlvl_scores, axis=0)
     anchors = tf.concat(mlvl_anchors, axis=0)
     deltas = tf.concat(mlvl_deltas, axis=0)
     proposals = transforms.delta2bbox(anchors, deltas, self.target_means,
                                       self.target_stds)
     # Clip to valid area
     window = tf.stack([0., 0., img_shape[0], img_shape[1]])
     proposals = transforms.bbox_clip(proposals, window)
     return self.batched_nms(proposals, scores, self.max_instances,
                             self.nms_threshold)
 def _get_proposals_single(self,
                           rpn_probs,
                           rpn_deltas,
                           anchors,
                           valid_flags,
                           img_shape,
                           with_probs,
                           training=True):
     '''
     Calculate proposals.
     
     Args
     ---
         rpn_probs: [num_anchors]
         rpn_deltas: [num_anchors, (dy, dx, log(dh), log(dw))]
         anchors: [num_anchors, (y1, x1, y2, x2)] anchors defined in 
             pixel coordinates.
         valid_flags: [num_anchors]
         img_shape: np.ndarray. [2]. (img_height, img_width)
         with_probs: bool.
     
     Returns
     ---
         proposals: [num_proposals, (y1, x1, y2, x2)] in normalized 
             coordinates.
     '''
     H = img_shape[0]
     W = img_shape[1]
     # filter invalid anchors, int => bool
     valid_flags = tf.cast(valid_flags, tf.bool)
     rpn_probs = tf.boolean_mask(rpn_probs, valid_flags)
     rpn_deltas = tf.boolean_mask(rpn_deltas, valid_flags)
     anchors = tf.boolean_mask(anchors, valid_flags)
     # Improve performance
     if training:
         num_pre_nms = self.num_pre_nms_train
     else:
         num_pre_nms = self.num_pre_nms_test
     pre_nms_limit = tf.math.minimum(num_pre_nms, tf.shape(anchors)[0])
     ix = tf.nn.top_k(rpn_probs, pre_nms_limit, sorted=False).indices
     rpn_probs = tf.gather(rpn_probs, ix)
     rpn_deltas = tf.gather(rpn_deltas, ix)
     anchors = tf.gather(anchors, ix)
     # Get refined anchors
     proposals = transforms.delta2bbox(anchors, rpn_deltas,
                                       self.target_means, self.target_stds)
     # Clip to valid area
     window = tf.stack([0., 0., H, W])
     proposals = transforms.bbox_clip(proposals, window)
     if training:
         proposal_count = self.num_post_nms_train
     else:
         proposal_count = self.num_post_nms_test
     rpn_probs = tf.cast(rpn_probs, proposals.dtype)
     # indices = tf.image.non_max_suppression(proposals, rpn_probs,
     #                                             max_output_size=proposal_count,
     #                                             iou_threshold=self.nms_threshold)
     indices = tf.raw_ops.NonMaxSuppressionV2(
         boxes=proposals,
         scores=rpn_probs,
         max_output_size=proposal_count,
         iou_threshold=self.nms_threshold)
     proposals = tf.stop_gradient(tf.gather(proposals, indices))
     if with_probs:
         proposal_probs = tf.expand_dims(tf.gather(rpn_probs, indices),
                                         axis=1)
         proposals = tf.concat([proposals, proposal_probs], axis=1)
     return proposals