예제 #1
0
파일: rcnn.py 프로젝트: wjfwzzc/Models
    def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None):
        rcnn_rois, labels, bbox_targets = self.get_ground_truth(
            rcnn_rois, im_info, gt_boxes)

        fpn_fms = [fpn_fms[x] for x in self.in_features]
        pool_features = layers.roi_pool(
            fpn_fms,
            rcnn_rois,
            self.stride,
            self.pooling_size,
            self.pooling_method,
        )
        flatten_feature = F.flatten(pool_features, start_axis=1)
        roi_feature = F.relu(self.fc1(flatten_feature))
        roi_feature = F.relu(self.fc2(roi_feature))
        pred_logits = self.pred_cls(roi_feature)
        pred_offsets = self.pred_delta(roi_feature)

        if self.training:
            # loss for rcnn classification
            loss_rcnn_cls = F.loss.cross_entropy(pred_logits, labels, axis=1)
            # loss for rcnn regression
            pred_offsets = pred_offsets.reshape(-1, self.cfg.num_classes, 4)
            num_samples = labels.shape[0]
            fg_mask = labels > 0
            loss_rcnn_bbox = layers.smooth_l1_loss(
                pred_offsets[fg_mask, labels[fg_mask] - 1],
                bbox_targets[fg_mask],
                self.cfg.rcnn_smooth_l1_beta,
            ).sum() / F.maximum(num_samples, mge.tensor(1))

            loss_dict = {
                "loss_rcnn_cls": loss_rcnn_cls,
                "loss_rcnn_bbox": loss_rcnn_bbox,
            }
            return loss_dict
        else:
            # slice 1 for removing background
            pred_scores = F.softmax(pred_logits, axis=1)[:, 1:]
            pred_offsets = pred_offsets.reshape(-1, 4)
            target_shape = (rcnn_rois.shape[0], self.cfg.num_classes, 4)
            # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4)
            base_rois = F.broadcast_to(
                F.expand_dims(rcnn_rois[:, 1:5], axis=1),
                target_shape).reshape(-1, 4)
            pred_bbox = self.box_coder.decode(base_rois, pred_offsets)
            return pred_bbox, pred_scores
예제 #2
0
    def forward(self, image, im_info, gt_boxes=None):
        image = self.preprocess_image(image)
        features = self.backbone(image)
        features = [features[f] for f in self.in_features]

        box_logits, box_offsets = self.head(features)

        box_logits_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1,
                                            self.cfg.num_classes)
            for _ in box_logits
        ]
        box_offsets_list = [
            _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4)
            for _ in box_offsets
        ]

        anchors_list = self.anchor_generator(features)

        all_level_box_logits = F.concat(box_logits_list, axis=1)
        all_level_box_offsets = F.concat(box_offsets_list, axis=1)
        all_level_anchors = F.concat(anchors_list, axis=0)

        if self.training:
            gt_labels, gt_offsets = self.get_ground_truth(
                all_level_anchors,
                gt_boxes,
                im_info[:, 4].astype(np.int32),
            )

            all_level_box_logits = all_level_box_logits.reshape(
                -1, self.cfg.num_classes)
            all_level_box_offsets = all_level_box_offsets.reshape(-1, 4)

            gt_labels = gt_labels.flatten()
            gt_offsets = gt_offsets.reshape(-1, 4)

            valid_mask = gt_labels >= 0
            fg_mask = gt_labels > 0
            num_fg = fg_mask.sum()

            gt_targets = F.zeros_like(all_level_box_logits)
            gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1

            loss_cls = layers.sigmoid_focal_loss(
                all_level_box_logits[valid_mask],
                gt_targets[valid_mask],
                alpha=self.cfg.focal_loss_alpha,
                gamma=self.cfg.focal_loss_gamma,
            ).sum() / F.maximum(num_fg, 1)

            loss_bbox = layers.smooth_l1_loss(
                all_level_box_offsets[fg_mask],
                gt_offsets[fg_mask],
                beta=self.cfg.smooth_l1_beta,
            ).sum() / F.maximum(num_fg, 1) * self.cfg.loss_bbox_weight

            total = loss_cls + loss_bbox
            loss_dict = {
                "total_loss": total,
                "loss_cls": loss_cls,
                "loss_bbox": loss_bbox,
            }
            self.cfg.losses_keys = list(loss_dict.keys())
            return loss_dict
        else:
            # currently not support multi-batch testing
            assert image.shape[0] == 1

            pred_boxes = self.box_coder.decode(all_level_anchors,
                                               all_level_box_offsets[0])
            pred_boxes = pred_boxes.reshape(-1, 4)

            scale_w = im_info[0, 1] / im_info[0, 3]
            scale_h = im_info[0, 0] / im_info[0, 2]
            pred_boxes = pred_boxes / F.concat(
                [scale_w, scale_h, scale_w, scale_h], axis=0)
            clipped_boxes = layers.get_clipped_boxes(pred_boxes,
                                                     im_info[0, 2:4]).reshape(
                                                         -1, 4)
            pred_score = F.sigmoid(all_level_box_logits)[0]
            return pred_score, clipped_boxes
예제 #3
0
    def get_losses(self, anchors, pred_logits, pred_offsets, gt_boxes,
                   im_info):
        # pylint: disable=too-many-statements
        def positive_bag_loss(logits, axis=1):
            weight = 1.0 / (1.0 - logits)
            weight /= weight.sum(axis=axis, keepdims=True)
            bag_prob = (weight * logits).sum(axis=1)
            return -layers.safelog(bag_prob)

        def negative_bag_loss(logits, gamma):
            return (logits**gamma) * (-layers.safelog(1.0 - logits))

        pred_scores = F.sigmoid(pred_logits)
        box_prob_list = []
        positive_losses = []
        clamp_eps = 1e-7
        bucket_size = self.cfg.bucket_size

        for bid in range(im_info.shape[0]):
            boxes_info = gt_boxes[bid, :im_info[bid, 4].astype("int32")]
            # id 0 is used for background classes, so -1 first
            labels = boxes_info[:, 4].astype("int32") - 1

            pred_box = self.box_coder.decode(anchors,
                                             pred_offsets[bid]).detach()
            overlaps = layers.get_iou(boxes_info[:, :4], pred_box).detach()
            thresh1 = self.cfg.box_iou_threshold
            thresh2 = F.clip(overlaps.max(axis=1, keepdims=True),
                             lower=thresh1 + clamp_eps,
                             upper=1.0)
            gt_pred_prob = F.clip((overlaps - thresh1) / (thresh2 - thresh1),
                                  lower=0,
                                  upper=1.0)

            image_boxes_prob = F.zeros(pred_logits.shape[1:]).detach()
            # guarantee that nonzero_idx is not empty
            if gt_pred_prob.max() > clamp_eps:
                _, nonzero_idx = F.cond_take(gt_pred_prob != 0, gt_pred_prob)
                # since nonzeros is only 1 dim, use num_anchor to get real indices
                num_anchors = gt_pred_prob.shape[1]
                anchors_idx = nonzero_idx % num_anchors
                gt_idx = nonzero_idx // num_anchors
                image_boxes_prob[anchors_idx,
                                 labels[gt_idx]] = gt_pred_prob[gt_idx,
                                                                anchors_idx]

            box_prob_list.append(image_boxes_prob)

            # construct bags for objects
            match_quality_matrix = layers.get_iou(boxes_info[:, :4],
                                                  anchors).detach()
            num_gt = match_quality_matrix.shape[0]
            _, matched_idx = F.topk(
                match_quality_matrix,
                k=bucket_size,
                descending=True,
                no_sort=True,
            )

            matched_idx = matched_idx.detach()
            matched_idx_flatten = matched_idx.reshape(-1)
            gather_idx = labels.reshape(-1, 1)
            gather_idx = F.broadcast_to(gather_idx, (num_gt, bucket_size))

            gather_src = pred_scores[bid, matched_idx_flatten]
            gather_src = gather_src.reshape(num_gt, bucket_size, -1)
            matched_score = F.indexing_one_hot(gather_src, gather_idx, axis=2)

            topk_anchors = anchors[matched_idx_flatten]
            boxes_broad_cast = F.broadcast_to(
                F.expand_dims(boxes_info[:, :4], axis=1),
                (num_gt, bucket_size, 4)).reshape(-1, 4)

            matched_offsets = self.box_coder.encode(topk_anchors,
                                                    boxes_broad_cast)

            reg_loss = layers.smooth_l1_loss(
                pred_offsets[bid, matched_idx_flatten],
                matched_offsets,
                beta=self.cfg.smooth_l1_beta).sum(
                    axis=-1) * self.cfg.reg_loss_weight
            matched_reg_scores = F.exp(-reg_loss)

            positive_losses.append(
                positive_bag_loss(matched_score *
                                  matched_reg_scores.reshape(-1, bucket_size),
                                  axis=1))

        num_foreground = im_info[:, 4].sum()
        pos_loss = F.concat(positive_losses).sum() / F.maximum(
            1.0, num_foreground)
        box_probs = F.stack(box_prob_list, axis=0)

        neg_loss = negative_bag_loss(
            pred_scores *
            (1 - box_probs), self.cfg.focal_loss_gamma).sum() / F.maximum(
                1.0, num_foreground * bucket_size)

        alpha = self.cfg.focal_loss_alpha
        pos_loss = pos_loss * alpha
        neg_loss = neg_loss * (1 - alpha)
        loss_dict = {
            "total_loss": pos_loss + neg_loss,
            "pos_loss": pos_loss,
            "neg_loss": neg_loss,
        }
        return loss_dict
예제 #4
0
    def forward(self, features, im_info, boxes=None):
        # prediction
        features = [features[x] for x in self.in_features]

        # get anchors
        anchors_list = self.anchor_generator(features)

        pred_cls_logit_list = []
        pred_bbox_offset_list = []
        for x in features:
            t = F.relu(self.rpn_conv(x))
            scores = self.rpn_cls_score(t)
            pred_cls_logit_list.append(
                scores.reshape(
                    scores.shape[0],
                    self.num_cell_anchors,
                    scores.shape[2],
                    scores.shape[3],
                ))
            bbox_offsets = self.rpn_bbox_offsets(t)
            pred_bbox_offset_list.append(
                bbox_offsets.reshape(
                    bbox_offsets.shape[0],
                    self.num_cell_anchors,
                    4,
                    bbox_offsets.shape[2],
                    bbox_offsets.shape[3],
                ))
        # get rois from the predictions
        rpn_rois = self.find_top_rpn_proposals(pred_cls_logit_list,
                                               pred_bbox_offset_list,
                                               anchors_list, im_info)

        if self.training:
            rpn_labels, rpn_offsets = self.get_ground_truth(
                anchors_list, boxes, im_info[:, 4].astype(np.int32))
            pred_cls_logits, pred_bbox_offsets = self.merge_rpn_score_box(
                pred_cls_logit_list, pred_bbox_offset_list)

            fg_mask = rpn_labels > 0
            valid_mask = rpn_labels >= 0
            num_valid = valid_mask.sum()

            # rpn classification loss
            loss_rpn_cls = F.loss.binary_cross_entropy(
                pred_cls_logits[valid_mask], rpn_labels[valid_mask])

            # rpn regression loss
            loss_rpn_bbox = layers.smooth_l1_loss(
                pred_bbox_offsets[fg_mask],
                rpn_offsets[fg_mask],
                self.cfg.rpn_smooth_l1_beta,
            ).sum() / F.maximum(num_valid, 1)

            loss_dict = {
                "loss_rpn_cls": loss_rpn_cls,
                "loss_rpn_bbox": loss_rpn_bbox
            }
            return rpn_rois, loss_dict
        else:
            return rpn_rois