def find_top_rpn_proposals(self, rpn_cls_score_list, rpn_bbox_offset_list, anchors_list, im_info): prev_nms_top_n = (self.cfg.train_prev_nms_top_n if self.training else self.cfg.test_prev_nms_top_n) post_nms_top_n = (self.cfg.train_post_nms_top_n if self.training else self.cfg.test_post_nms_top_n) return_rois = [] for bid in range(im_info.shape[0]): batch_proposal_list = [] batch_score_list = [] batch_level_list = [] for l, (rpn_cls_score, rpn_bbox_offset, anchors) in enumerate( zip(rpn_cls_score_list, rpn_bbox_offset_list, anchors_list)): # get proposals and scores offsets = rpn_bbox_offset[bid].transpose(2, 3, 0, 1).reshape(-1, 4) proposals = self.box_coder.decode(anchors, offsets) scores = rpn_cls_score[bid].transpose(1, 2, 0).flatten() scores.detach() # prev nms top n scores, order = F.topk(scores, descending=True, k=prev_nms_top_n) proposals = proposals[order, :] batch_proposal_list.append(proposals) batch_score_list.append(scores) batch_level_list.append(F.full_like(scores, l)) # gather proposals, scores, level proposals = F.concat(batch_proposal_list, axis=0) scores = F.concat(batch_score_list, axis=0) levels = F.concat(batch_level_list, axis=0) proposals = layers.get_clipped_boxes(proposals, im_info[bid]) # filter invalid proposals and apply total level nms keep_mask = layers.filter_boxes(proposals) _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) proposals = proposals[keep_inds, :] scores = scores[keep_inds] levels = levels[keep_inds] nms_keep_inds = layers.batched_nms(proposals, scores, levels, self.cfg.rpn_nms_threshold, post_nms_top_n) # generate rois to rcnn head, rois shape (N, 5), info [batch_id, x1, y1, x2, y2] rois = F.concat([proposals, scores.reshape(-1, 1)], axis=1) rois = rois[nms_keep_inds] batch_inds = F.full((rois.shape[0], 1), bid) batch_rois = F.concat([batch_inds, rois[:, :4]], axis=1) return_rois.append(batch_rois) return_rois = F.concat(return_rois, axis=0) return return_rois.detach()
def inference(self, features, im_info): rpn_rois = self.rpn(features, im_info) pred_boxes, pred_score = self.rcnn(features, rpn_rois) pred_boxes = pred_boxes.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] pred_boxes = pred_boxes / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_boxes = layers.get_clipped_boxes( pred_boxes, im_info[0, 2:4]).reshape(-1, self.cfg.num_classes, 4) return pred_score, clipped_boxes
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_anchors = F.concat(anchors_list, axis=0) if self.training: loss_dict = self.get_losses(all_level_anchors, all_level_box_logits, all_level_box_offsets, gt_boxes, im_info) self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 transformed_box = self.box_coder.decode(all_level_anchors, all_level_box_offsets[0]) transformed_box = transformed_box.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] transformed_box = transformed_box / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_box = layers.get_clipped_boxes(transformed_box, im_info[0, 2:4]).reshape( -1, 4) all_level_box_scores = F.sigmoid(all_level_box_logits) return all_level_box_scores[0], clipped_box
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets, box_ctrness = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] box_ctrness_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 1) for _ in box_ctrness ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_box_ctrness = F.concat(box_ctrness_list, axis=1) if self.training: gt_labels, gt_offsets, gt_ctrness = self.get_ground_truth( anchors_list, gt_boxes, im_info[:, 4].astype(np.int32), ) all_level_box_logits = all_level_box_logits.reshape( -1, self.cfg.num_classes) all_level_box_offsets = all_level_box_offsets.reshape(-1, 4) all_level_box_ctrness = all_level_box_ctrness.flatten() gt_labels = gt_labels.flatten() gt_offsets = gt_offsets.reshape(-1, 4) gt_ctrness = gt_ctrness.flatten() valid_mask = gt_labels >= 0 fg_mask = gt_labels > 0 num_fg = fg_mask.sum() sum_ctr = gt_ctrness[fg_mask].sum() # add detach() to avoid syncing across ranks in backward num_fg = layers.all_reduce_mean(num_fg).detach() sum_ctr = layers.all_reduce_mean(sum_ctr).detach() gt_targets = F.zeros_like(all_level_box_logits) gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1 loss_cls = layers.sigmoid_focal_loss( all_level_box_logits[valid_mask], gt_targets[valid_mask], alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, ).sum() / F.maximum(num_fg, 1) loss_bbox = (layers.iou_loss( all_level_box_offsets[fg_mask], gt_offsets[fg_mask], box_mode="ltrb", loss_type=self.cfg.iou_loss_type, ) * gt_ctrness[fg_mask]).sum() / F.maximum( sum_ctr, 1e-5) * self.cfg.loss_bbox_weight loss_ctr = layers.binary_cross_entropy( all_level_box_ctrness[fg_mask], gt_ctrness[fg_mask], ).sum() / F.maximum(num_fg, 1) total = loss_cls + loss_bbox + loss_ctr loss_dict = { "total_loss": total, "loss_cls": loss_cls, "loss_bbox": loss_bbox, "loss_ctr": loss_ctr, } self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 all_level_anchors = F.concat(anchors_list, axis=0) pred_boxes = self.point_coder.decode(all_level_anchors, all_level_box_offsets[0]) pred_boxes = pred_boxes.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] pred_boxes = pred_boxes / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_boxes = layers.get_clipped_boxes(pred_boxes, im_info[0, 2:4]).reshape( -1, 4) pred_score = F.sqrt( F.sigmoid(all_level_box_logits) * F.sigmoid(all_level_box_ctrness))[0] return pred_score, clipped_boxes
def forward(self, image, im_info, gt_boxes=None): image = self.preprocess_image(image) features = self.backbone(image) features = [features[f] for f in self.in_features] box_logits, box_offsets = self.head(features) box_logits_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, self.cfg.num_classes) for _ in box_logits ] box_offsets_list = [ _.transpose(0, 2, 3, 1).reshape(image.shape[0], -1, 4) for _ in box_offsets ] anchors_list = self.anchor_generator(features) all_level_box_logits = F.concat(box_logits_list, axis=1) all_level_box_offsets = F.concat(box_offsets_list, axis=1) all_level_anchors = F.concat(anchors_list, axis=0) if self.training: gt_labels, gt_offsets = self.get_ground_truth( all_level_anchors, gt_boxes, im_info[:, 4].astype(np.int32), ) all_level_box_logits = all_level_box_logits.reshape( -1, self.cfg.num_classes) all_level_box_offsets = all_level_box_offsets.reshape(-1, 4) gt_labels = gt_labels.flatten() gt_offsets = gt_offsets.reshape(-1, 4) valid_mask = gt_labels >= 0 fg_mask = gt_labels > 0 num_fg = fg_mask.sum() gt_targets = F.zeros_like(all_level_box_logits) gt_targets[fg_mask, gt_labels[fg_mask] - 1] = 1 loss_cls = layers.sigmoid_focal_loss( all_level_box_logits[valid_mask], gt_targets[valid_mask], alpha=self.cfg.focal_loss_alpha, gamma=self.cfg.focal_loss_gamma, ).sum() / F.maximum(num_fg, 1) loss_bbox = layers.smooth_l1_loss( all_level_box_offsets[fg_mask], gt_offsets[fg_mask], beta=self.cfg.smooth_l1_beta, ).sum() / F.maximum(num_fg, 1) * self.cfg.loss_bbox_weight total = loss_cls + loss_bbox loss_dict = { "total_loss": total, "loss_cls": loss_cls, "loss_bbox": loss_bbox, } self.cfg.losses_keys = list(loss_dict.keys()) return loss_dict else: # currently not support multi-batch testing assert image.shape[0] == 1 pred_boxes = self.box_coder.decode(all_level_anchors, all_level_box_offsets[0]) pred_boxes = pred_boxes.reshape(-1, 4) scale_w = im_info[0, 1] / im_info[0, 3] scale_h = im_info[0, 0] / im_info[0, 2] pred_boxes = pred_boxes / F.concat( [scale_w, scale_h, scale_w, scale_h], axis=0) clipped_boxes = layers.get_clipped_boxes(pred_boxes, im_info[0, 2:4]).reshape( -1, 4) pred_score = F.sigmoid(all_level_box_logits)[0] return pred_score, clipped_boxes