def refine_detections(rois, probs, deltas, batch_ixs, cf): """ Refine classified proposals, filter overlaps and return final detections. :param rois: (n_proposals, 2 * dim) normalized boxes as proposed by RPN. n_proposals = batch_size * POST_NMS_ROIS :param probs: (n_proposals, n_classes) softmax probabilities for all rois as predicted by mrcnn classifier. :param deltas: (n_proposals, n_classes, 2 * dim) box refinement deltas as predicted by mrcnn bbox regressor. :param batch_ixs: (n_proposals) batch element assignemnt info for re-allocation. :return: result: (n_final_detections, (y1, x1, y2, x2, (z1), (z2), batch_ix, pred_class_id, pred_score)) """ # class IDs per ROI. Since scores of all classes are of interest (not just max class), all are kept at this point. class_ids = [] fg_classes = cf.head_classes - 1 # repeat vectors to fill in predictions for all foreground classes. for ii in range(1, fg_classes + 1): class_ids += [ii] * rois.shape[0] class_ids = torch.from_numpy(np.array(class_ids)).cuda() rois = rois.repeat(fg_classes, 1) probs = probs.repeat(fg_classes, 1) deltas = deltas.repeat(fg_classes, 1, 1) batch_ixs = batch_ixs.repeat(fg_classes) # get class-specific scores and bounding box deltas idx = torch.arange(class_ids.size()[0]).long().cuda() class_scores = probs[idx, class_ids] deltas_specific = deltas[idx, class_ids] batch_ixs = batch_ixs[idx] # apply bounding box deltas. re-scale to image coordinates. std_dev = torch.from_numpy(np.reshape(cf.rpn_bbox_std_dev, [1, cf.dim * 2])).float().cuda() scale = torch.from_numpy(cf.scale).float().cuda() refined_rois = mutils.apply_box_deltas_2D(rois, deltas_specific * std_dev) * scale if cf.dim == 2 else \ mutils.apply_box_deltas_3D(rois, deltas_specific * std_dev) * scale # round and cast to int since we're deadling with pixels now refined_rois = mutils.clip_to_window(cf.window, refined_rois) refined_rois = torch.round(refined_rois) # filter out low confidence boxes keep = idx keep_bool = (class_scores >= cf.model_min_confidence) if 0 not in torch.nonzero(keep_bool).size(): score_keep = torch.nonzero(keep_bool)[:, 0] pre_nms_class_ids = class_ids[score_keep] pre_nms_rois = refined_rois[score_keep] pre_nms_scores = class_scores[score_keep] pre_nms_batch_ixs = batch_ixs[score_keep] for j, b in enumerate(mutils.unique1d(pre_nms_batch_ixs)): bixs = torch.nonzero(pre_nms_batch_ixs == b)[:, 0] bix_class_ids = pre_nms_class_ids[bixs] bix_rois = pre_nms_rois[bixs] bix_scores = pre_nms_scores[bixs] for i, class_id in enumerate(mutils.unique1d(bix_class_ids)): ixs = torch.nonzero(bix_class_ids == class_id)[:, 0] # nms expects boxes sorted by score. ix_rois = bix_rois[ixs] ix_scores = bix_scores[ixs] ix_scores, order = ix_scores.sort(descending=True) ix_rois = ix_rois[order, :] if cf.dim == 2: class_keep = nms_2D(torch.cat((ix_rois, ix_scores.unsqueeze(1)), dim=1), cf.detection_nms_threshold) else: class_keep = nms_3D(torch.cat((ix_rois, ix_scores.unsqueeze(1)), dim=1), cf.detection_nms_threshold) # map indices back. class_keep = keep[score_keep[bixs[ixs[order[class_keep]]]]] # merge indices over classes for current batch element b_keep = class_keep if i == 0 else mutils.unique1d(torch.cat((b_keep, class_keep))) # only keep top-k boxes of current batch-element top_ids = class_scores[b_keep].sort(descending=True)[1][:cf.model_max_instances_per_batch_element] b_keep = b_keep[top_ids] # merge indices over batch elements. batch_keep = b_keep if j == 0 else mutils.unique1d(torch.cat((batch_keep, b_keep))) keep = batch_keep else: keep = torch.tensor([0]).long().cuda() # arrange output result = torch.cat((refined_rois[keep], batch_ixs[keep].unsqueeze(1), class_ids[keep].unsqueeze(1).float(), class_scores[keep].unsqueeze(1)), dim=1) return result
def refine_detections(anchors, probs, deltas, regressions, batch_ixs, cf): """Refine classified proposals, filter overlaps and return final detections. n_proposals here is typically a very large number: batch_size * n_anchors. This function is hence optimized on trimming down n_proposals. :param anchors: (n_anchors, 2 * dim) :param probs: (n_proposals, n_classes) softmax probabilities for all rois as predicted by classifier head. :param deltas: (n_proposals, n_classes, 2 * dim) box refinement deltas as predicted by bbox regressor head. :param regressions: (n_proposals, n_classes, n_rg_feats) :param batch_ixs: (n_proposals) batch element assignemnt info for re-allocation. :return: result: (n_final_detections, (y1, x1, y2, x2, (z1), (z2), batch_ix, pred_class_id, pred_score, pred_regr)) """ anchors = anchors.repeat(len(np.unique(batch_ixs)), 1) #flatten foreground probabilities, sort and trim down to highest confidences by pre_nms limit. fg_probs = probs[:, 1:].contiguous() flat_probs, flat_probs_order = fg_probs.view(-1).sort(descending=True) keep_ix = flat_probs_order[:cf.pre_nms_limit] # reshape indices to 2D index array with shape like fg_probs. keep_arr = torch.cat(((keep_ix / fg_probs.shape[1]).unsqueeze(1), (keep_ix % fg_probs.shape[1]).unsqueeze(1)), 1) pre_nms_scores = flat_probs[:cf.pre_nms_limit] pre_nms_class_ids = keep_arr[:, 1] + 1 # add background again. pre_nms_batch_ixs = batch_ixs[keep_arr[:, 0]] pre_nms_anchors = anchors[keep_arr[:, 0]] pre_nms_deltas = deltas[keep_arr[:, 0]] pre_nms_regressions = regressions[keep_arr[:, 0]] keep = torch.arange(pre_nms_scores.size()[0]).long().cuda() # apply bounding box deltas. re-scale to image coordinates. std_dev = torch.from_numpy(np.reshape(cf.rpn_bbox_std_dev, [1, cf.dim * 2])).float().cuda() scale = torch.from_numpy(cf.scale).float().cuda() refined_rois = mutils.apply_box_deltas_2D(pre_nms_anchors / scale, pre_nms_deltas * std_dev) * scale \ if cf.dim == 2 else mutils.apply_box_deltas_3D(pre_nms_anchors / scale, pre_nms_deltas * std_dev) * scale # round and cast to int since we're deadling with pixels now refined_rois = mutils.clip_to_window(cf.window, refined_rois) pre_nms_rois = torch.round(refined_rois) for j, b in enumerate(mutils.unique1d(pre_nms_batch_ixs)): bixs = torch.nonzero(pre_nms_batch_ixs == b)[:, 0] bix_class_ids = pre_nms_class_ids[bixs] bix_rois = pre_nms_rois[bixs] bix_scores = pre_nms_scores[bixs] for i, class_id in enumerate(mutils.unique1d(bix_class_ids)): ixs = torch.nonzero(bix_class_ids == class_id)[:, 0] # nms expects boxes sorted by score. ix_rois = bix_rois[ixs] ix_scores = bix_scores[ixs] ix_scores, order = ix_scores.sort(descending=True) ix_rois = ix_rois[order, :] ix_scores = ix_scores if cf.dim == 2: class_keep = nms_2D( torch.cat((ix_rois, ix_scores.unsqueeze(1)), dim=1), cf.detection_nms_threshold) else: class_keep = nms_3D( torch.cat((ix_rois, ix_scores.unsqueeze(1)), dim=1), cf.detection_nms_threshold) # map indices back. class_keep = keep[bixs[ixs[order[class_keep]]]] # merge indices over classes for current batch element b_keep = class_keep if i == 0 else mutils.unique1d( torch.cat((b_keep, class_keep))) # only keep top-k boxes of current batch-element. top_ids = pre_nms_scores[b_keep].sort( descending=True)[1][:cf.model_max_instances_per_batch_element] b_keep = b_keep[top_ids] # merge indices over batch elements. batch_keep = b_keep if j == 0 else mutils.unique1d( torch.cat((batch_keep, b_keep))) keep = batch_keep # arrange output. result = torch.cat( (pre_nms_rois[keep], pre_nms_batch_ixs[keep].unsqueeze(1).float(), pre_nms_class_ids[keep].unsqueeze(1).float(), pre_nms_scores[keep].unsqueeze(1), pre_nms_regressions[keep]), dim=1) return result
def proposal_layer(rpn_pred_probs, rpn_pred_deltas, proposal_count, anchors, cf): """ Receives anchor scores and selects a subset to pass as proposals to the second stage. Filtering is done based on anchor scores and non-max suppression to remove overlaps. It also applies bounding box refinment detals to anchors. :param rpn_pred_probs: (b, n_anchors, 2) :param rpn_pred_deltas: (b, n_anchors, (y, x, (z), log(h), log(w), (log(d)))) :return: batch_normalized_boxes: Proposals in normalized coordinates (b, proposal_count, (y1, x1, y2, x2, (z1), (z2))) :return: batch_out_proposals: Box coords + RPN foreground scores for monitoring/plotting (b, proposal_count, (y1, x1, y2, x2, (z1), (z2), score)) """ batch_scores = rpn_pred_probs[:, :, 1] batch_deltas = rpn_pred_deltas batch_anchors = anchors batch_normalized_boxes = [] batch_out_proposals = [] # loop over batch dimension. for ix in range(batch_scores.shape[0]): scores = batch_scores[ix] deltas = batch_deltas[ix] anchors = batch_anchors.clone() # norm deltas std_dev = torch.from_numpy(cf.rpn_bbox_std_dev[None]).float().cuda() deltas = deltas * std_dev # improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = min(cf.pre_nms_limit, anchors.size()[0]) scores, order = scores.sort(descending=True) order = order[:pre_nms_limit] scores = scores[:pre_nms_limit] deltas = deltas[order, :] anchors = anchors[order, :] # apply deltas to anchors to get refined anchors and filter with non-maximum surpression. if batch_deltas.shape[-1] == 4: boxes = mutils.apply_box_deltas_2D(anchors, deltas) boxes = mutils.clip_boxes_2D(boxes, cf.window) keep = nms_2D(torch.cat((boxes, scores.unsqueeze(1)), 1), cf.rpn_nms_threshold) norm = torch.from_numpy(cf.scale).float().cuda() else: boxes = mutils.apply_box_deltas_3D(anchors, deltas) boxes = mutils.clip_boxes_3D(boxes, cf.window) keep = nms_3D(torch.cat((boxes, scores.unsqueeze(1)), 1), cf.rpn_nms_threshold) norm = torch.from_numpy(cf.scale).float().cuda() keep = keep[:proposal_count] boxes = boxes[keep, :] rpn_scores = scores[keep][:, None] # pad missing boxes with 0. if boxes.shape[0] < proposal_count: n_pad_boxes = proposal_count - boxes.shape[0] zeros = torch.zeros([n_pad_boxes, boxes.shape[1]]).cuda() boxes = torch.cat([boxes, zeros], dim=0) zeros = torch.zeros([n_pad_boxes, rpn_scores.shape[1]]).cuda() rpn_scores = torch.cat([rpn_scores, zeros], dim=0) # concat box and score info for monitoring/plotting. batch_out_proposals.append(torch.cat((boxes, rpn_scores), 1).cpu().data.numpy()) # normalize dimensions to range of 0 to 1. normalized_boxes = boxes / norm # add back batch dimension batch_normalized_boxes.append(normalized_boxes.unsqueeze(0)) batch_normalized_boxes = torch.cat(batch_normalized_boxes) batch_out_proposals = np.array(batch_out_proposals) return batch_normalized_boxes, batch_out_proposals