def roi_pool( rpn_fms, rois, stride, pool_shape, pooler_type="roi_align", ): rois = rois.detach() assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = int(math.log2(stride[0])) max_level = int(math.log2(stride[-1])) num_fms = len(rpn_fms) box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) assigned_level = F.floor(canonical_level + F.log(F.sqrt(box_area) / canonical_box_size) / np.log(2)).astype("int32") assigned_level = F.minimum(assigned_level, max_level) assigned_level = F.maximum(assigned_level, min_level) assigned_level = assigned_level - min_level # avoid empty assignment assigned_level = F.concat([ assigned_level, F.arange(num_fms, dtype="int32", device=assigned_level.device) ], ) rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))]) pool_list, inds_list = [], [] for i in range(num_fms): _, inds = F.cond_take(assigned_level == i, assigned_level) level_rois = rois[inds] if pooler_type == "roi_pool": pool_fm = F.nn.roi_pooling(rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]) elif pooler_type == "roi_align": pool_fm = F.nn.roi_align( rpn_fms[i], level_rois, pool_shape, mode="average", spatial_scale=1.0 / stride[i], sample_points=2, aligned=True, ) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.argsort(F.concat(inds_list, axis=0)) pool_feature = F.concat(pool_list, axis=0) pool_feature = pool_feature[fm_order][:-num_fms] return pool_feature
def test_condtake(): x = np.array([[1, 2, 3], [4, 5, 6]]) y = np.array([[True, False, True], [False, True, True]]) xx = tensor(x) yy = tensor(y) val, idx = F.cond_take(yy, xx) np.testing.assert_equal(val.numpy(), x[y]) np.testing.assert_equal(idx.numpy(), np.where(y.reshape(-1))[0])
def find_top_rpn_proposals(self, rpn_cls_score_list, rpn_bbox_offset_list, anchors_list, im_info): prev_nms_top_n = (self.cfg.train_prev_nms_top_n if self.training else self.cfg.test_prev_nms_top_n) post_nms_top_n = (self.cfg.train_post_nms_top_n if self.training else self.cfg.test_post_nms_top_n) return_rois = [] for bid in range(im_info.shape[0]): batch_proposal_list = [] batch_score_list = [] batch_level_list = [] for l, (rpn_cls_score, rpn_bbox_offset, anchors) in enumerate( zip(rpn_cls_score_list, rpn_bbox_offset_list, anchors_list)): # get proposals and scores offsets = rpn_bbox_offset[bid].transpose(2, 3, 0, 1).reshape(-1, 4) proposals = self.box_coder.decode(anchors, offsets) scores = rpn_cls_score[bid].transpose(1, 2, 0).flatten() scores.detach() # prev nms top n scores, order = F.topk(scores, descending=True, k=prev_nms_top_n) proposals = proposals[order, :] batch_proposal_list.append(proposals) batch_score_list.append(scores) batch_level_list.append(F.full_like(scores, l)) # gather proposals, scores, level proposals = F.concat(batch_proposal_list, axis=0) scores = F.concat(batch_score_list, axis=0) levels = F.concat(batch_level_list, axis=0) proposals = layers.get_clipped_boxes(proposals, im_info[bid]) # filter invalid proposals and apply total level nms keep_mask = layers.filter_boxes(proposals) _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) proposals = proposals[keep_inds, :] scores = scores[keep_inds] levels = levels[keep_inds] nms_keep_inds = layers.batched_nms(proposals, scores, levels, self.cfg.rpn_nms_threshold, post_nms_top_n) # generate rois to rcnn head, rois shape (N, 5), info [batch_id, x1, y1, x2, y2] rois = F.concat([proposals, scores.reshape(-1, 1)], axis=1) rois = rois[nms_keep_inds] batch_inds = F.full((rois.shape[0], 1), bid) batch_rois = F.concat([batch_inds, rois[:, :4]], axis=1) return_rois.append(batch_rois) return_rois = F.concat(return_rois, axis=0) return return_rois.detach()
def get_ground_truth(self, rpn_rois, im_info, gt_boxes): if not self.training: return rpn_rois, None, None return_rois = [] return_labels = [] return_bbox_targets = [] # get per image proposals and gt_boxes for bid in range(gt_boxes.shape[0]): num_valid_boxes = im_info[bid, 4].astype("int32") gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :] batch_inds = F.full((gt_boxes_per_img.shape[0], 1), bid) gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1) batch_roi_mask = rpn_rois[:, 0] == bid # all_rois : [batch_id, x1, y1, x2, y2] all_rois = F.concat([rpn_rois[batch_roi_mask], gt_rois]) overlaps = layers.get_iou(all_rois[:, 1:5], gt_boxes_per_img) max_overlaps = overlaps.max(axis=1) gt_assignment = F.argmax(overlaps, axis=1).astype("int32") labels = gt_boxes_per_img[gt_assignment, 4] # ---------------- get the fg/bg labels for each roi ---------------# fg_mask = (max_overlaps >= self.cfg.fg_threshold) & (labels >= 0) bg_mask = ((max_overlaps >= self.cfg.bg_threshold_low) & (max_overlaps < self.cfg.bg_threshold_high)) num_fg_rois = int(self.cfg.num_rois * self.cfg.fg_ratio) fg_inds_mask = layers.sample_mask_from_labels( fg_mask, num_fg_rois, 1) num_bg_rois = int(self.cfg.num_rois - fg_inds_mask.sum()) bg_inds_mask = layers.sample_mask_from_labels( bg_mask, num_bg_rois, 1) labels = labels * fg_inds_mask keep_mask = fg_inds_mask + bg_inds_mask _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) # Add next line to avoid memory exceed keep_inds = keep_inds[:min(self.cfg.num_rois, keep_inds.shape[0])] labels = labels[keep_inds].astype("int32") rois = all_rois[keep_inds] target_boxes = gt_boxes_per_img[gt_assignment[keep_inds], :4] bbox_targets = self.box_coder.encode(rois[:, 1:5], target_boxes) bbox_targets = bbox_targets.reshape(-1, 4) return_rois.append(rois) return_labels.append(labels) return_bbox_targets.append(bbox_targets) return ( F.concat(return_rois, axis=0).detach(), F.concat(return_labels, axis=0).detach(), F.concat(return_bbox_targets, axis=0).detach(), )
def roi_pool( rpn_fms, rois, stride, pool_shape, roi_type="roi_align", ): assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = math.log2(stride[0]) max_level = math.log2(stride[-1]) num_fms = len(rpn_fms) box_area = (rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2]) level_assignments = F.floor(canonical_level + F.log(box_area.sqrt() / canonical_box_size) / np.log(2)) level_assignments = F.minimum(level_assignments, max_level) level_assignments = F.maximum(level_assignments, min_level) level_assignments = level_assignments - min_level # avoid empty assignment level_assignments = F.concat( [level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], ) rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof(-1)))]) pool_list, inds_list = [], [] for i in range(num_fms): mask = level_assignments == i _, inds = F.cond_take(mask == 1, mask) level_rois = rois.ai[inds] if roi_type == "roi_pool": pool_fm = F.roi_pooling(rpn_fms[i], level_rois, pool_shape, mode="max", scale=1.0 / stride[i]) elif roi_type == "roi_align": pool_fm = F.roi_align( rpn_fms[i], level_rois, pool_shape, mode="average", spatial_scale=1.0 / stride[i], sample_points=2, aligned=True, ) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.concat(inds_list, axis=0) fm_order = F.argsort(fm_order.reshape(1, -1))[1].reshape(-1) pool_feature = F.concat(pool_list, axis=0) pool_feature = pool_feature.ai[fm_order][:-num_fms] return pool_feature
def fpn_anchor_target_opr_core_impl(gt_boxes, im_info, anchors, allow_low_quality_matches=True): ignore_label = config.ignore_label # get the gt boxes gtboxes = gt_boxes[:im_info[5].astype(np.int32)] ignore_mask = F.equal(gtboxes[:, 4], config.ignore_label) # find the valid gtboxes _, index = F.cond_take(1 - ignore_mask > 0, ignore_mask) valid_gt_boxes = gtboxes[index.astype(np.int32)] # compute the iou matrix overlaps = box_overlap_opr(anchors, valid_gt_boxes[:, :4]) # match the dtboxes a_shp0 = anchors.shape[0] argmax_overlaps = F.argmax(overlaps, axis=1) max_overlaps = F.nn.indexing_one_hot(overlaps, argmax_overlaps.astype(np.int32), 1) labels = F.ones(a_shp0).astype(np.int32) * ignore_label # set negative ones labels = labels * (max_overlaps >= config.rpn_negative_overlap).astype( np.float32) # set positive ones fg_mask = (max_overlaps >= config.rpn_positive_overlap) const_one = mge.tensor(1.0) if allow_low_quality_matches: # match the max gt gt_max_overlaps = F.max(overlaps, axis=0) gt_argmax_overlaps = F.argmax(overlaps, axis=0) gt_argmax_overlaps = gt_argmax_overlaps.astype(np.int32) max_overlaps[gt_argmax_overlaps] = 1. m = gt_max_overlaps.shape[0] argmax_overlaps[gt_argmax_overlaps] = F.linspace(0, m - 1, m).astype(np.int32) fg_mask = (max_overlaps >= config.rpn_positive_overlap) labels[fg_mask] = 1 # compute the bbox targets bbox_targets = bbox_transform_opr(anchors, valid_gt_boxes[argmax_overlaps, :4]) if config.rpn_bbox_normalize_targets: std_opr = mge.tensor(config.bbox_normalize_stds[None, :]).to( anchors.device) mean_opr = mge.tensor(config.bbox_normalize_means[None, :]).to( anchors.device) minus_opr = mean_opr / std_opr bbox_targets = bbox_targets / std_opr - minus_opr return labels, bbox_targets
def test_condtake(is_varnode): if is_varnode: network = Network() else: network = None x = np.array([[1, 2, 3], [4, 5, 6]]).astype("float32") y = np.array([[True, False, True], [False, True, True]]) xx = make_tensor(x, network) yy = make_tensor(y, network) val, idx = F.cond_take(yy, xx) np.testing.assert_equal(val.numpy(), x[y]) np.testing.assert_equal(idx.numpy(), np.where(y.reshape(-1))[0])
def forward(self, fpn_fms, rcnn_rois, gt_boxes=None, im_info=None): if self.training: loss = {} for i, _ in enumerate(self.iou_thrs): loss_dict, prob = self.subnets[i](fpn_fms, rcnn_rois, gt_boxes, im_info) rois = prob[:, 1] rcnn_list = [] for bid in range(config.batch_per_gpu): mask = F.equal(rois[:, 5], bid) _, index = F.cond_take(mask > 0, mask) batch_id = bid * F.ones([mask.sum(), 1]) m = F.concat([batch_id, rois[index, :4]], axis=1) rcnn_list.append(m) rcnn_rois = F.concat(rcnn_list, axis=0) loss.update(loss_dict) return loss else: # boxes_pred = self._forward_test(fpn_fms, rcnn_rois) for i, _ in enumerate(self.iou_thrs): prob = self.subnets[i](fpn_fms, rcnn_rois) rois = prob[:, 1] rcnn_list = [] for bid in range(1): mask = F.equal(rois[:, 5], bid) _, index = F.cond_take(mask > 0, mask) batch_id = bid * F.ones([mask.sum(), 1]) m = F.concat([batch_id, rois[index, :4]], axis=1) rcnn_list.append(m) rcnn_rois = F.concat(rcnn_list, axis=0) return prob[:, :, :5]
def test_dump_cond_take(): a = Tensor([1.0, 2.0]) @trace(symbolic=True, capture_as_const=True) def fwd(a): return F.cond_take(a > 1, a) fwd(a) orig_model = io.BytesIO() fwd.dump( orig_model, arg_names=["a"], output_names=["o1", "o2"], optimize_for_inference=False, ) orig_model.seek(0) net = Net.load(orig_model) var_a = net.input_vars[0] val, idx = F.cond_take(var_a > 1, var_a) net.remove_output(*net.output_vars) val.name = "value" idx.name = "index" net.add_output(val, idx) modified_model = io.BytesIO() net.dump(modified_model) modified_model.seek(0) g = GraphInference(modified_model) out = g.run(a.numpy()) data = a.numpy() mask = a.numpy() > 1 np.testing.assert_equal(out["index"], np.where(mask.reshape(-1))[0]) np.testing.assert_equal(out["value"], data[mask])
def per_level_gt(self, gt_boxes, im_info, anchors, allow_low_quality_matches=True): ignore_label = self.cfg.ignore_label # get the gt boxes valid_gt_boxes = gt_boxes[:im_info[4], :] # compute the iou matrix overlaps = layers.get_iou(anchors, valid_gt_boxes[:, :4]) # match the dtboxes a_shp0 = anchors.shape[0] max_overlaps = F.max(overlaps, axis=1) argmax_overlaps = F.argmax(overlaps, axis=1) # all ignore labels = mge.ones(a_shp0).astype("int32") * ignore_label # set negative ones labels = labels * (max_overlaps >= self.cfg.rpn_negative_overlap) # set positive ones fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap const_one = mge.tensor(1.0) if allow_low_quality_matches: # make sure that max iou of gt matched gt_argmax_overlaps = F.argmax(overlaps, axis=0) num_valid_boxes = valid_gt_boxes.shapeof(0) gt_id = F.linspace(0, num_valid_boxes - 1, num_valid_boxes).astype("int32") argmax_overlaps = argmax_overlaps.set_ai(gt_id)[gt_argmax_overlaps] max_overlaps = max_overlaps.set_ai( const_one.broadcast(num_valid_boxes))[gt_argmax_overlaps] fg_mask = max_overlaps >= self.cfg.rpn_positive_overlap # set positive ones _, fg_mask_ind = F.cond_take(fg_mask == 1, fg_mask) labels = labels.set_ai(const_one.broadcast( fg_mask_ind.shapeof(0)))[fg_mask_ind] # compute the targets bbox_targets = self.box_coder.encode( anchors, valid_gt_boxes.ai[argmax_overlaps, :4]) return labels, bbox_targets
def fwd(mask, x): v, index = F.cond_take(mask, x) return v, index
def mask_to_inds(mask): _, inds = F.cond_take(mask, mask) return inds.astype(np.int32)
def fwd(a): return F.cond_take(a > 1, a)
def roi_pool(rpn_fms, rois, stride, pool_shape, roi_type='roi_align', labels=None, bbox_targets=None): assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = math.log2(stride[0]) max_level = math.log2(stride[-1]) num_fms = len(rpn_fms) box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])) level_assignments = F.floor(canonical_level + F.log(box_sizes / canonical_box_size) / np.log(2)) level_assignments = F.minimum(level_assignments, max_level) level_assignments = F.maximum(level_assignments, min_level) level_assignments = level_assignments - min_level available_masks = F.concat( [F.ones(level_assignments.shape[0]), F.zeros(num_fms)], axis=0) level_assignments = F.concat( [level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], axis=0) rois = F.concat([rois, F.zeros((num_fms, rois.shape[-1]))], axis=0) if labels is not None and bbox_targets is not None: labels = F.concat([labels, F.ones((num_fms, labels.shape[-1]))], axis=0) bbox_targets = F.concat( [bbox_targets, F.zeros((num_fms, bbox_targets.shape[-1]))], axis=0) pool_list, inds_list = [], [] for i in range(len(rpn_fms)): # mask = level_assignments == i # inds = mask_to_inds(mask) mask = F.equal(level_assignments, i) _, inds = F.cond_take(mask > 0, mask) rois_fm = rois[inds.astype(np.int32)] if roi_type == 'roi_pool': pool_fm = F.nn.roi_pooling(rpn_fms[i], rois_fm, pool_shape, mode='max', scale=1.0 / stride[i]) elif roi_type == 'roi_align': pool_fm = F.nn.roi_align(rpn_fms[i], rois_fm, pool_shape, mode='average', spatial_scale=1.0 / stride[i], sample_points=2, aligned=True) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.concat(inds_list, axis=0) pool_feature = F.concat(pool_list, axis=0) ordered_available_masks = available_masks[fm_order] # available_inds = mask_to_inds(ordered_available_masks) _, available_inds = F.cond_take(ordered_available_masks > 0, ordered_available_masks) available_inds = available_inds.astype(np.int32) pool_feature = pool_feature[available_inds.astype(np.int32)] rois = rois[fm_order, :][available_inds.astype(np.int32)] if labels is not None: labels = labels[fm_order][available_inds] bbox_targets = bbox_targets[fm_order][available_inds] return pool_feature, rois, labels.detach(), bbox_targets.detach() else: return pool_feature, rois, None, None
def get_losses(self, anchors, pred_logits, pred_offsets, gt_boxes, im_info): # pylint: disable=too-many-statements def positive_bag_loss(logits, axis=1): weight = 1.0 / (1.0 - logits) weight /= weight.sum(axis=axis, keepdims=True) bag_prob = (weight * logits).sum(axis=1) return -layers.safelog(bag_prob) def negative_bag_loss(logits, gamma): return (logits**gamma) * (-layers.safelog(1.0 - logits)) pred_scores = F.sigmoid(pred_logits) box_prob_list = [] positive_losses = [] clamp_eps = 1e-7 bucket_size = self.cfg.bucket_size for bid in range(im_info.shape[0]): boxes_info = gt_boxes[bid, :im_info[bid, 4].astype("int32")] # id 0 is used for background classes, so -1 first labels = boxes_info[:, 4].astype("int32") - 1 pred_box = self.box_coder.decode(anchors, pred_offsets[bid]).detach() overlaps = layers.get_iou(boxes_info[:, :4], pred_box).detach() thresh1 = self.cfg.box_iou_threshold thresh2 = F.clip(overlaps.max(axis=1, keepdims=True), lower=thresh1 + clamp_eps, upper=1.0) gt_pred_prob = F.clip((overlaps - thresh1) / (thresh2 - thresh1), lower=0, upper=1.0) image_boxes_prob = F.zeros(pred_logits.shape[1:]).detach() # guarantee that nonzero_idx is not empty if gt_pred_prob.max() > clamp_eps: _, nonzero_idx = F.cond_take(gt_pred_prob != 0, gt_pred_prob) # since nonzeros is only 1 dim, use num_anchor to get real indices num_anchors = gt_pred_prob.shape[1] anchors_idx = nonzero_idx % num_anchors gt_idx = nonzero_idx // num_anchors image_boxes_prob[anchors_idx, labels[gt_idx]] = gt_pred_prob[gt_idx, anchors_idx] box_prob_list.append(image_boxes_prob) # construct bags for objects match_quality_matrix = layers.get_iou(boxes_info[:, :4], anchors).detach() num_gt = match_quality_matrix.shape[0] _, matched_idx = F.topk( match_quality_matrix, k=bucket_size, descending=True, no_sort=True, ) matched_idx = matched_idx.detach() matched_idx_flatten = matched_idx.reshape(-1) gather_idx = labels.reshape(-1, 1) gather_idx = F.broadcast_to(gather_idx, (num_gt, bucket_size)) gather_src = pred_scores[bid, matched_idx_flatten] gather_src = gather_src.reshape(num_gt, bucket_size, -1) matched_score = F.indexing_one_hot(gather_src, gather_idx, axis=2) topk_anchors = anchors[matched_idx_flatten] boxes_broad_cast = F.broadcast_to( F.expand_dims(boxes_info[:, :4], axis=1), (num_gt, bucket_size, 4)).reshape(-1, 4) matched_offsets = self.box_coder.encode(topk_anchors, boxes_broad_cast) reg_loss = layers.smooth_l1_loss( pred_offsets[bid, matched_idx_flatten], matched_offsets, beta=self.cfg.smooth_l1_beta).sum( axis=-1) * self.cfg.reg_loss_weight matched_reg_scores = F.exp(-reg_loss) positive_losses.append( positive_bag_loss(matched_score * matched_reg_scores.reshape(-1, bucket_size), axis=1)) num_foreground = im_info[:, 4].sum() pos_loss = F.concat(positive_losses).sum() / F.maximum( 1.0, num_foreground) box_probs = F.stack(box_prob_list, axis=0) neg_loss = negative_bag_loss( pred_scores * (1 - box_probs), self.cfg.focal_loss_gamma).sum() / F.maximum( 1.0, num_foreground * bucket_size) alpha = self.cfg.focal_loss_alpha pos_loss = pos_loss * alpha neg_loss = neg_loss * (1 - alpha) loss_dict = { "total_loss": pos_loss + neg_loss, "pos_loss": pos_loss, "neg_loss": neg_loss, } return loss_dict
def find_top_rpn_proposals(is_train, rpn_bbox_offsets_list, rpn_cls_prob_list, all_anchors_list, im_info): prev_nms_top_n = config.train_prev_nms_top_n \ if is_train else config.test_prev_nms_top_n post_nms_top_n = config.train_post_nms_top_n \ if is_train else config.test_post_nms_top_n batch_per_gpu = config.batch_per_gpu if is_train else 1 nms_threshold = config.rpn_nms_threshold box_min_size = config.rpn_min_box_size bbox_normalize_targets = config.rpn_bbox_normalize_targets bbox_normalize_means = config.bbox_normalize_means bbox_normalize_stds = config.bbox_normalize_stds list_size = len(rpn_bbox_offsets_list) return_rois, return_probs = [], [] batch_per_gpu = rpn_cls_prob_list[0].shape[0] for bid in range(batch_per_gpu): batch_proposals_list = [] batch_probs_list = [] for l in range(list_size): # get proposals and probs offsets = rpn_bbox_offsets_list[l][bid] \ .transpose(1, 2, 0).reshape(-1, 4) if bbox_normalize_targets: std_opr = tensor(config.bbox_normalize_stds[None, :]) mean_opr = tensor(config.bbox_normalize_means[None, :]) pred_offsets = pred_offsets * std_opr pred_offsets = pred_offsets + mean_opr all_anchors = all_anchors_list[l] proposals = bbox_transform_inv_opr(all_anchors, offsets) if config.anchor_within_border: proposals = clip_boxes_opr(proposals, im_info[bid, :]) probs = rpn_cls_prob_list[l][bid] \ .transpose(1,2,0).reshape(-1, 2) probs = F.softmax(probs)[:, 1] # gather the proposals and probs batch_proposals_list.append(proposals) batch_probs_list.append(probs) batch_proposals = F.concat(batch_proposals_list, axis=0) batch_probs = F.concat(batch_probs_list, axis=0) # filter the boxes with small size. wh = batch_proposals[:, 2:4] - batch_proposals[:, :2] + 1 thresh = box_min_size * im_info[bid, 2] keep_mask = F.prod((wh >= thresh), axis=1) keep_mask = keep_mask + F.equal(keep_mask.sum(), 0) keep_mask, inds = F.cond_take(keep_mask > 0, keep_mask) inds = inds.astype(np.int32) # batch_proposals = F.nn.indexing_one_hot(batch_proposals, inds, 0) # batch_probs = F.nn.indexing_one_hot(batch_probs, inds, 0) batch_proposals, batch_probs = batch_proposals[inds], batch_probs[inds] # prev_nms_top_n num_proposals = F.minimum(prev_nms_top_n, batch_proposals.shape[0]) idx = F.argsort(batch_probs, descending=True) topk_idx = idx[:num_proposals].reshape(-1) batch_proposals = batch_proposals[topk_idx].detach() batch_probs = batch_probs[topk_idx].detach() # For each image, run a total-level NMS, and choose topk results. keep_inds = nms(batch_proposals, batch_probs, nms_threshold, max_output=2000) # num = F.minimum(post_nms_top_n, keep_inds.shape[0]) # keep_inds = keep_inds[:num] batch_rois, batch_probs = batch_proposals[keep_inds], batch_probs[ keep_inds] # cons the rois batch_inds = F.ones((batch_rois.shape[0], 1)) * bid batch_rois = F.concat([batch_inds, batch_rois[:, :4]], axis=1) return_rois.append(batch_rois) return_probs.append(batch_probs) if batch_per_gpu == 1: return batch_rois, batch_probs else: concated_rois = F.concat(return_rois, axis=0) concated_probs = F.concat(return_probs, axis=0) return concated_rois, concated_probs
def fn(mask, data): return F.cond_take(mask, data)
def fpn_roi_target(rpn_rois, im_info, gt_boxes, fg_threshold=config.fg_threshold, top_k=1): return_rois, return_labels = [], [] return_bbox_targets = [] # get per image proposals and gt_boxes batch_per_gpu = im_info.shape[0] sampling = True # is_sample = True if top_k < 2 else False for bid in range(batch_per_gpu): gt_boxes_perimg = gt_boxes[bid, :im_info[bid, 5].astype(np.int32), :] dummy_gt = F.ones([1, gt_boxes_perimg.shape[1]]) batch_inds = F.ones((gt_boxes_perimg.shape[0], 1)) * bid #if config.proposal_append_gt: gt_rois = F.concat([batch_inds, gt_boxes_perimg[:, :4]], axis=1) batch_rois_mask = F.equal(rpn_rois[:, 0], bid) > 0 _, batch_rois_index = F.cond_take(batch_rois_mask, batch_rois_mask) # batch_roi_mask = rpn_rois[:, 0] == bid # batch_roi_inds = mask_to_inds(batch_roi_mask) all_rois= F.concat([rpn_rois[batch_rois_index], gt_rois], axis=0) if sampling \ else rpn_rois[batch_rois_index] # all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois], axis=0) gt_boxes_perimg = F.concat([gt_boxes_perimg, dummy_gt], axis=0) overlaps_normal, overlaps_ignore = box_overlap_ignore_opr( all_rois[:, 1:5], gt_boxes_perimg) # overlaps_normal, overlaps_normal_indices = F.argsort(overlaps_normal, descending=True) # overlaps_ignore, overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True) overlaps_normal_indices = F.argsort(overlaps_normal, descending=True) overlaps_normal = F.gather(overlaps_normal, 1, overlaps_normal_indices) # overlaps_normal = F.nn.indexing_one_hot(overlaps_normal, overlaps_normal_indices, 1) overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True) overlaps_ignore = F.gather(overlaps_ignore, 1, overlaps_ignore_indices) # overlaps_ignore = F.nn.indexing_one_hot(overlaps_ignore, overlaps_ignore_indices, 1) # gt max and indices, ignore max and indices max_overlaps_normal = overlaps_normal[:, :top_k].flatten() gt_assignment_normal = overlaps_normal_indices[:, :top_k].flatten() max_overlaps_ignore = overlaps_ignore[:, :top_k].flatten() gt_assignment_ignore = overlaps_ignore_indices[:, :top_k].flatten() # cons masks ignore_assign_mask = (max_overlaps_normal < fg_threshold).astype( np.float32) * (max_overlaps_ignore > max_overlaps_normal).astype( np.float32) max_overlaps = max_overlaps_normal * (1 - ignore_assign_mask).astype(np.float32) + \ max_overlaps_ignore * ignore_assign_mask gt_assignment = gt_assignment_normal * (1- ignore_assign_mask) + \ gt_assignment_ignore * ignore_assign_mask gt_assignment = gt_assignment.astype(np.int32) labels = gt_boxes_perimg[gt_assignment, 4] fg_mask = (max_overlaps >= fg_threshold).astype( np.float32) * (1 - F.equal(labels, config.ignore_label)) bg_mask = (max_overlaps < config.bg_threshold_high).astype( np.float32) * (max_overlaps >= config.bg_threshold_low).astype( np.float32) fg_mask = fg_mask.reshape(-1, top_k) bg_mask = bg_mask.reshape(-1, top_k) pos_max = config.num_rois * config.fg_ratio fg_inds_mask = _bernoulli_sample_masks( fg_mask[:, 0], pos_max, 1) if sampling else F.equal(fg_mask[:, 0], 0) neg_max = config.num_rois - fg_inds_mask.sum() bg_inds_mask = _bernoulli_sample_masks( bg_mask[:, 0], neg_max, 1) if sampling else F.equal(bg_mask[:, 0], 0) labels = labels * fg_mask.reshape(-1) keep_mask = fg_inds_mask + bg_inds_mask keep_mask = keep_mask + F.equal(keep_mask.sum(), 0) # keep_inds = mask_to_inds(keep_mask) _, keep_inds = F.cond_take(keep_mask > 0, keep_mask) #keep_inds = keep_inds[:F.minimum(config.num_rois, keep_inds.shapeof()[0])] # labels labels = labels.reshape(-1, top_k)[keep_inds] gt_assignment = gt_assignment.reshape( -1, top_k)[keep_inds].reshape(-1).astype(np.int32) target_boxes = gt_boxes_perimg[gt_assignment, :4] # rois = all_rois.ai[keep_inds] rois = all_rois[keep_inds] # target_shape = (rois.shapeof()[0], top_k, rois.shapeof()[-1]) n, c = rois.shape[0], rois.shape[1] target_rois = F.broadcast_to(F.expand_dims(rois, 1), (n, top_k, c)).reshape(-1, c) # target_rois = F.add_axis(rois, 1).broadcast(target_shape).reshape(-1, rois.shapeof()[-1]) bbox_targets = bbox_transform_opr(target_rois[:, 1:5], target_boxes[:, :4]) if config.rcnn_bbox_normalize_targets: std_opr = mge.tensor(config.bbox_normalize_stds[None, :]).to( rois.device) mean_opr = mge.tensor(config.bbox_normalize_means[None, :]).to( rois.device) minus_opr = mean_opr / std_opr bbox_targets = bbox_targets / std_opr - minus_opr bbox_targets = bbox_targets.reshape(-1, top_k * 4) return_rois.append(rois) return_labels.append(labels) return_bbox_targets.append(bbox_targets) if config.batch_per_gpu == 1: rois, labels, bbox_targets = rois.detach(), labels.detach( ), bbox_targets.detach() return rois, labels, bbox_targets # return F.zero_grad(rois), F.zero_grad(labels), F.zero_grad(bbox_targets) else: return_rois = F.concat(return_rois, axis=0) return_labels = F.concat(return_labels, axis=0) return_bbox_targets = F.concat(return_bbox_targets, axis=0) return_rois = return_rois.detach() return_labels = return_labels.detach() return_bbox_targets = return_bbox_targets.detach() return return_rois, return_labels, return_bbox_targets
def get_ground_truth(self, rpn_rois, im_info, gt_boxes): if not self.training: return rpn_rois, None, None return_rois = [] return_labels = [] return_bbox_targets = [] # get per image proposals and gt_boxes for bid in range(self.cfg.batch_per_gpu): num_valid_boxes = im_info[bid, 4] gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :] batch_inds = mge.ones((gt_boxes_per_img.shapeof(0), 1)) * bid # if config.proposal_append_gt: gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1) batch_roi_mask = rpn_rois[:, 0] == bid _, batch_roi_inds = F.cond_take(batch_roi_mask == 1, batch_roi_mask) # all_rois : [batch_id, x1, y1, x2, y2] all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois]) overlaps_normal, overlaps_ignore = layers.get_iou( all_rois[:, 1:5], gt_boxes_per_img, return_ignore=True, ) max_overlaps_normal = overlaps_normal.max(axis=1) gt_assignment_normal = F.argmax(overlaps_normal, axis=1) max_overlaps_ignore = overlaps_ignore.max(axis=1) gt_assignment_ignore = F.argmax(overlaps_ignore, axis=1) ignore_assign_mask = (max_overlaps_normal < self.cfg.fg_threshold) * ( max_overlaps_ignore > max_overlaps_normal ) max_overlaps = ( max_overlaps_normal * (1 - ignore_assign_mask) + max_overlaps_ignore * ignore_assign_mask ) gt_assignment = ( gt_assignment_normal * (1 - ignore_assign_mask) + gt_assignment_ignore * ignore_assign_mask ) gt_assignment = gt_assignment.astype("int32") labels = gt_boxes_per_img.ai[gt_assignment, 4] # ---------------- get the fg/bg labels for each roi ---------------# fg_mask = (max_overlaps >= self.cfg.fg_threshold) * ( labels != self.cfg.ignore_label ) bg_mask = (max_overlaps < self.cfg.bg_threshold_high) * ( max_overlaps >= self.cfg.bg_threshold_low ) num_fg_rois = self.cfg.num_rois * self.cfg.fg_ratio fg_inds_mask = self._bernoulli_sample_masks(fg_mask, num_fg_rois, 1) num_bg_rois = self.cfg.num_rois - fg_inds_mask.sum() bg_inds_mask = self._bernoulli_sample_masks(bg_mask, num_bg_rois, 1) labels = labels * fg_inds_mask keep_mask = fg_inds_mask + bg_inds_mask _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) # Add next line to avoid memory exceed keep_inds = keep_inds[: F.minimum(self.cfg.num_rois, keep_inds.shapeof(0))] # labels labels = labels.ai[keep_inds].astype("int32") rois = all_rois.ai[keep_inds] target_boxes = gt_boxes_per_img.ai[gt_assignment.ai[keep_inds], :4] bbox_targets = self.box_coder.encode(rois[:, 1:5], target_boxes) bbox_targets = bbox_targets.reshape(-1, 4) return_rois.append(rois) return_labels.append(labels) return_bbox_targets.append(bbox_targets) return ( F.zero_grad(F.concat(return_rois, axis=0)), F.zero_grad(F.concat(return_labels, axis=0)), F.zero_grad(F.concat(return_bbox_targets, axis=0)), )
def find_top_rpn_proposals( self, rpn_bbox_offsets_list, rpn_cls_prob_list, all_anchors_list, im_info ): prev_nms_top_n = self.cfg.train_prev_nms_top_n \ if self.training else self.cfg.test_prev_nms_top_n post_nms_top_n = self.cfg.train_post_nms_top_n \ if self.training else self.cfg.test_post_nms_top_n batch_per_gpu = self.cfg.batch_per_gpu if self.training else 1 nms_threshold = self.cfg.rpn_nms_threshold list_size = len(rpn_bbox_offsets_list) return_rois = [] for bid in range(batch_per_gpu): batch_proposals_list = [] batch_probs_list = [] batch_level_list = [] for l in range(list_size): # get proposals and probs offsets = rpn_bbox_offsets_list[l][bid].dimshuffle(2, 3, 0, 1).reshape(-1, 4) all_anchors = all_anchors_list[l] proposals = self.box_coder.decode(all_anchors, offsets) probs = rpn_cls_prob_list[l][bid, 1].dimshuffle(1, 2, 0).reshape(1, -1) # prev nms top n probs, order = F.argsort(probs, descending=True) num_proposals = F.minimum(probs.shapeof(1), prev_nms_top_n) probs = probs.reshape(-1)[:num_proposals] order = order.reshape(-1)[:num_proposals] proposals = proposals.ai[order, :] batch_proposals_list.append(proposals) batch_probs_list.append(probs) batch_level_list.append(mge.ones(probs.shapeof(0)) * l) proposals = F.concat(batch_proposals_list, axis=0) scores = F.concat(batch_probs_list, axis=0) level = F.concat(batch_level_list, axis=0) proposals = layers.get_clipped_box(proposals, im_info[bid, :]) # filter empty keep_mask = layers.filter_boxes(proposals) _, keep_inds = F.cond_take(keep_mask == 1, keep_mask) proposals = proposals.ai[keep_inds, :] scores = scores.ai[keep_inds] level = level.ai[keep_inds] # gather the proposals and probs # sort nms by scores scores, order = F.argsort(scores.reshape(1, -1), descending=True) order = order.reshape(-1) proposals = proposals.ai[order, :] level = level.ai[order] # apply total level nms rois = F.concat([proposals, scores.reshape(-1, 1)], axis=1) keep_inds = batched_nms(proposals, scores, level, nms_threshold, post_nms_top_n) rois = rois.ai[keep_inds] # rois shape (N, 5), info [batch_id, x1, y1, x2, y2] batch_inds = mge.ones((rois.shapeof(0), 1)) * bid batch_rois = F.concat([batch_inds, rois[:, :4]], axis=1) return_rois.append(batch_rois) return F.zero_grad(F.concat(return_rois, axis=0))
def _anchor_double_target(gt_boxes, im_info, all_anchors): gt_boxes, im_info = gt_boxes.detach(), im_info.detach() all_anchors = all_anchors.detach() gt_boxes = gt_boxes[:im_info[5].astype(np.int32), :] dummy = -F.ones([1, gt_boxes.shape[1]]).to(gt_boxes.device) gt_boxes = F.concat([gt_boxes, dummy], axis=0) valid_mask = 1 - (gt_boxes[:, 4] < 0).astype(np.float32) anchor_centers = _compute_center(all_anchors) gtboxes_centers = _compute_center(gt_boxes) # gtboxes_centers = gtboxes_centers * valid_mask.unsqueeze(1) gtboxes_centers = gtboxes_centers * F.expand_dims(valid_mask, axis=1) N, K = all_anchors.shape[0], gt_boxes.shape[0] an_centers = F.expand_dims(anchor_centers, axis=1) gt_centers = F.expand_dims(gtboxes_centers, axis=0) # an_centers = anchor_centers.unsqueeze(1).repeat(1, K, 1) # gt_centers = gtboxes_centers.unsqueeze(0).repeat(N, 1, 1) distance = F.abs(an_centers - gt_centers) distance = F.sqrt(F.pow(distance, 2).sum(axis=2)) start = 0 end = 5 overlaps = box_overlap_opr(all_anchors[:, :4], gt_boxes[:, :4]) overlaps *= F.expand_dims(valid_mask, axis=0) default_num = 16 ious_list = [] for l in range(start, end): _, index = F.cond_take(all_anchors[:, 4] == l, all_anchors[:, 4]) level_dist = distance[index, :].transpose(1, 0) ious = overlaps[index, :].transpose(1, 0) sorted_index = F.argsort(level_dist, descending=False) n = min(sorted_index.shape[1], default_num) ious = F.gather(ious, 1, sorted_index[:, :n]).transpose(1, 0) ious_list.append(ious) ious = F.concat(ious_list, axis=0) mean_var = F.mean(ious, axis=0) std_var = F.std(ious, 0) iou_thresh_per_gt = mean_var + std_var iou_thresh_per_gt = F.maximum(iou_thresh_per_gt, 0.2) # limits the anchor centers in the gtboxes N, K = all_anchors.shape[0], gt_boxes.shape[0] anchor_points = an_centers pos_area = _compute_pos_area(gt_boxes, 0.3) # pos_area = pos_area.unsqueeze(0).repeat(N, 1, 1) pos_area = F.broadcast_to(F.expand_dims(pos_area, axis=0), (N, K, pos_area.shape[-1])) l = anchor_points[:, :, 0] - pos_area[:, :, 0] r = pos_area[:, :, 2] - anchor_points[:, :, 0] t = anchor_points[:, :, 1] - pos_area[:, :, 1] b = pos_area[:, :, 3] - anchor_points[:, :, 1] is_in_gt = F.stack([l, r, t, b], axis=2) is_in_gt = is_in_gt.min(axis=2) > 0.1 valid_mask = (overlaps >= F.expand_dims( iou_thresh_per_gt, axis=0)) * is_in_gt.astype(np.float32) ious = overlaps * valid_mask sorted_index = F.argsort(ious, 1) sorted_overlaps = F.gather(ious, 1, sorted_index) max_overlaps = sorted_overlaps[:, :2].flatten() argmax_overlaps = sorted_index[:, :2].flatten() n, c = all_anchors.shape device = all_anchors.device labels = -F.ones(2 * n).to(device) positive_mask = (max_overlaps >= 0.2).to(device).astype(np.float32) negative_mask = (max_overlaps < 0.2).to(device).astype(np.float32) labels = positive_mask + labels * (1 - positive_mask) * (1 - negative_mask) bbox_targets = gt_boxes[argmax_overlaps, :4] all_anchors = F.broadcast_to(F.expand_dims(all_anchors, axis=1), (n, 2, c)).reshape(-1, c) bbox_targets = bbox_transform_opr(all_anchors[:, :4], bbox_targets) labels_cat = gt_boxes[argmax_overlaps, 4] labels_cat = labels_cat * (1 - F.equal(labels, -1).astype( np.float32)) - F.equal(labels, -1).astype(np.float32) return labels, bbox_targets, labels_cat