def roi_pool(rpn_fms, rois, stride, pool_shape, roi_type='roi_align', labels=None, bbox_targets=None): assert len(stride) == len(rpn_fms) canonical_level = 4 canonical_box_size = 224 min_level = math.log2(stride[0]) max_level = math.log2(stride[-1]) num_fms = len(rpn_fms) box_sizes = F.sqrt((rois[:, 3] - rois[:, 1]) * (rois[:, 4] - rois[:, 2])) level_assignments = F.floor( canonical_level + F.log(box_sizes / canonical_box_size) / np.log(2) ) level_assignments = F.minimum(level_assignments, max_level) level_assignments = F.maximum(level_assignments, min_level) level_assignments = level_assignments - min_level available_masks = F.concat( [mge.ones(level_assignments.shapeof()[0]), mge.zeros(num_fms)], axis=0) level_assignments = F.concat([level_assignments, mge.tensor(np.arange(num_fms, dtype=np.int32))], axis=0) rois = F.concat([rois, mge.zeros((num_fms, rois.shapeof()[-1]))], axis=0) if labels is not None: labels = F.concat([labels, mge.ones((num_fms, labels.shapeof()[-1]))], axis=0) bbox_targets = F.concat([bbox_targets, mge.zeros((num_fms, bbox_targets.shapeof()[-1]))], axis=0) pool_list, inds_list = [], [] for i in range(len(rpn_fms)): mask = level_assignments == i inds = mask_to_inds(mask) rois_fm = rois.ai[inds] if roi_type == 'roi_pool': pool_fm = F.roi_pooling( rpn_fms[i], rois_fm, pool_shape, mode='max', scale=1.0/stride[i]) elif roi_type == 'roi_align': pool_fm = F.roi_align( rpn_fms[i], rois_fm, pool_shape, mode='average', spatial_scale=1.0/stride[i], sample_points=2, aligned=True) pool_list.append(pool_fm) inds_list.append(inds) fm_order = F.concat(inds_list, axis=0) pool_feature = F.concat(pool_list, axis=0) ordered_available_masks = available_masks.ai[fm_order] available_inds = mask_to_inds(ordered_available_masks) pool_feature = pool_feature.ai[available_inds] rois = rois.ai[fm_order, :].ai[available_inds, :] if labels is not None: labels = labels.ai[fm_order].ai[available_inds] bbox_targets = bbox_targets.ai[fm_order, :].ai[available_inds, :] return pool_feature, rois, F.zero_grad(labels), F.zero_grad(bbox_targets) else: return pool_feature, rois, None, None
def fpn_anchor_target_opr_core_impl(gt_boxes, im_info, anchors, allow_low_quality_matches=True): ignore_label = config.ignore_label # get the gt boxes valid_gt_boxes = gt_boxes[:im_info[5], :] non_ignore_mask = valid_gt_boxes[:, -1] > 0 non_ignore_inds = mask_to_inds(non_ignore_mask) valid_gt_boxes = valid_gt_boxes.ai[non_ignore_inds] # compute the iou matrix overlaps = box_overlap_opr(anchors, valid_gt_boxes[:, :4]) # match the dtboxes a_shp0 = anchors.shape[0] max_overlaps = F.max(overlaps, axis=1) argmax_overlaps = F.argmax(overlaps, axis=1) # all ignore labels = mge.ones(a_shp0).astype(np.int32) * ignore_label # set negative ones labels = labels * (max_overlaps >= config.rpn_negative_overlap) # set positive ones fg_mask = (max_overlaps >= config.rpn_positive_overlap) const_one = mge.tensor(1.0) if allow_low_quality_matches: # match the max gt gt_max_overlaps = F.max(overlaps, axis=0) gt_argmax_overlaps = F.argmax(overlaps, axis=0) g_shp0 = valid_gt_boxes.shapeof()[0] gt_id = F.linspace(0, g_shp0 - 1, g_shp0).astype(np.int32) argmax_overlaps = argmax_overlaps.set_ai(gt_id)[gt_argmax_overlaps] max_overlaps = max_overlaps.set_ai( const_one.broadcast(g_shp0))[gt_argmax_overlaps] fg_mask = (max_overlaps >= config.rpn_positive_overlap) # set positive ones fg_mask_ind = mask_to_inds(fg_mask) labels = labels.set_ai(const_one.broadcast( fg_mask_ind.shapeof()))[fg_mask_ind] # compute the targets bbox_targets = bbox_transform_opr(anchors, valid_gt_boxes.ai[argmax_overlaps, :4]) if config.rpn_bbox_normalize_targets: std_opr = mge.tensor(config.bbox_normalize_stds[None, :]) mean_opr = mge.tensor(config.bbox_normalize_means[None, :]) minus_opr = mean_opr / std_opr bbox_targets = bbox_targets / std_opr - minus_opr return labels, bbox_targets
def forward(self, features, im_info, boxes=None): # prediction pred_cls_score_list = [] pred_bbox_offsets_list = [] for x in features: t = F.relu(self.rpn_conv(x)) pred_cls_score_list.append(self.rpn_cls_score(t)) pred_bbox_offsets_list.append(self.rpn_bbox_offsets(t)) # get anchors all_anchors_list = [] fm_stride = 2**(len(features) + 1) for fm in features: layer_anchors = self.anchors_generator(fm, fm_stride) fm_stride = fm_stride // 2 all_anchors_list.append(layer_anchors) # sample from the predictions rpn_rois, rpn_probs = find_top_rpn_proposals(self.training, pred_bbox_offsets_list, pred_cls_score_list, all_anchors_list, im_info) if self.training: rpn_labels, rpn_bbox_targets = fpn_anchor_target( boxes, im_info, all_anchors_list) #rpn_labels = rpn_labels.astype(np.int32) pred_cls_score, pred_bbox_offsets = fpn_rpn_reshape( pred_cls_score_list, pred_bbox_offsets_list) # rpn loss valid_masks = rpn_labels >= 0 valid_inds = mask_to_inds(valid_masks) objectness_loss = softmax_loss(pred_cls_score.ai[valid_inds], rpn_labels.ai[valid_inds]) #objectness_loss = objectness_loss * valid_masks pos_masks = rpn_labels > 0 localization_loss = smooth_l1_loss(pred_bbox_offsets, rpn_bbox_targets, config.rpn_smooth_l1_beta) localization_loss = localization_loss * pos_masks normalizer = 1.0 / (valid_masks.sum()) loss_rpn_cls = objectness_loss.sum() * normalizer loss_rpn_loc = localization_loss.sum() * normalizer loss_dict = {} loss_dict['loss_rpn_cls'] = loss_rpn_cls loss_dict['loss_rpn_loc'] = loss_rpn_loc return rpn_rois, loss_dict else: return rpn_rois
def cascade_roi_target(rpn_rois, im_info, gt_boxes, pos_threshold=0.5, top_k=1): return_rois = [] return_labels = [] return_bbox_targets = [] # get per image proposals and gt_boxes for bid in range(config.batch_per_gpu): gt_boxes_perimg = gt_boxes[bid, :im_info[bid, 5], :] batch_inds = mge.ones((gt_boxes_perimg.shapeof()[0], 1)) * bid #if config.proposal_append_gt: gt_rois = F.concat([batch_inds, gt_boxes_perimg[:, :4]], axis=1) batch_roi_mask = rpn_rois[:, 0] == bid batch_roi_inds = mask_to_inds(batch_roi_mask) all_rois = F.concat([rpn_rois.ai[batch_roi_inds], gt_rois], axis=0) overlaps_normal, overlaps_ignore = box_overlap_ignore_opr( all_rois[:, 1:5], gt_boxes_perimg) overlaps_normal, overlaps_normal_indices = F.argsort(overlaps_normal, descending=True) overlaps_ignore, overlaps_ignore_indices = F.argsort(overlaps_ignore, descending=True) # gt max and indices, ignore max and indices max_overlaps_normal = overlaps_normal[:, :top_k].reshape(-1) gt_assignment_normal = overlaps_normal_indices[:, :top_k].reshape(-1) max_overlaps_ignore = overlaps_ignore[:, :top_k].reshape(-1) gt_assignment_ignore = overlaps_ignore_indices[:, :top_k].reshape(-1) # cons masks ignore_assign_mask = (max_overlaps_normal < config.fg_threshold) * ( max_overlaps_ignore > max_overlaps_normal) max_overlaps = max_overlaps_normal * (1 - ignore_assign_mask) + \ max_overlaps_ignore * ignore_assign_mask gt_assignment = gt_assignment_normal * (1- ignore_assign_mask) + \ gt_assignment_ignore * ignore_assign_mask gt_assignment = gt_assignment.astype(np.int32) labels = gt_boxes_perimg.ai[gt_assignment, 4] fg_mask = (max_overlaps >= config.fg_threshold) * (1 - F.equal(labels, config.ignore_label)) bg_mask = (max_overlaps < config.bg_threshold_high) * ( max_overlaps >= config.bg_threshold_low) fg_mask = fg_mask.reshape(-1, top_k) bg_mask = bg_mask.reshape(-1, top_k) #pos_max = config.num_rois * config.fg_ratio #fg_inds_mask = _bernoulli_sample_masks(fg_mask[:, 0], pos_max, 1) #neg_max = config.num_rois - fg_inds_mask.sum() #bg_inds_mask = _bernoulli_sample_masks(bg_mask[:, 0], neg_max, 1) labels = labels * fg_mask.reshape(-1) #keep_mask = fg_inds_mask + bg_inds_mask #keep_inds = mask_to_inds(keep_mask) #keep_inds = keep_inds[:F.minimum(config.num_rois, keep_inds.shapeof()[0])] # labels labels = labels.reshape(-1, top_k) gt_assignment = gt_assignment.reshape(-1, top_k).reshape(-1) target_boxes = gt_boxes_perimg.ai[gt_assignment, :4] #rois = all_rois.ai[keep_inds] target_shape = (all_rois.shapeof()[0], top_k, all_rois.shapeof()[-1]) target_rois = F.add_axis(all_rois, 1).broadcast(target_shape).reshape(-1, all_rois.shapeof()[-1]) bbox_targets = bbox_transform_opr(target_rois[:, 1:5], target_boxes) if config.rcnn_bbox_normalize_targets: std_opr = mge.tensor(config.bbox_normalize_stds[None, :]) mean_opr = mge.tensor(config.bbox_normalize_means[None, :]) minus_opr = mean_opr / std_opr bbox_targets = bbox_targets / std_opr - minus_opr bbox_targets = bbox_targets.reshape(-1, top_k * 4) return_rois.append(all_rois) return_labels.append(labels) return_bbox_targets.append(bbox_targets) if config.batch_per_gpu == 1: return F.zero_grad(all_rois), F.zero_grad(labels), F.zero_grad(bbox_targets) else: return_rois = F.concat(return_rois, axis=0) return_labels = F.concat(return_labels, axis=0) return_bbox_targets = F.concat(return_bbox_targets, axis=0) return F.zero_grad(return_rois), F.zero_grad(return_labels), F.zero_grad(return_bbox_targets)