def make_one_rcnn_target(cfg, proposals, truth_boxes, truth_labels): sampled_proposal = torch.zeros((0, 7), dtype=torch.float32).to(cfg.device) sampled_label = torch.zeros((0, ), dtype=torch.int64).to(cfg.device) sampled_assign = np.zeros((0, ), np.int32) sampled_target = torch.zeros((0, 4), dtype=torch.float32).to(cfg.device) # filter invalid proposals num_proposal = len(proposals) valid = [] for i in range(num_proposal): box = proposals[i, 1:5] if not (is_small_box(box, min_size=cfg.mask_train_min_size)): valid.append(i) proposals = proposals[valid] # assign fg/bg to each box num_proposal = len(proposals) if len(truth_boxes) > 0 and num_proposal > 0: box = proposals[:, 1:5] # for each bbox, the index of gt which has max box_overlap with it overlap = cython_box_overlap(box, truth_boxes) argmax_overlap = np.argmax(overlap, 1) max_overlap = overlap[np.arange(num_proposal), argmax_overlap] fg_index = np.where(max_overlap >= cfg.rcnn_train_fg_thresh_low)[0] bg_index = np.where((max_overlap < cfg.rcnn_train_bg_thresh_high) & \ (max_overlap >= cfg.rcnn_train_bg_thresh_low))[0] fg_index, bg_index, num_fg = balance(fg_index, bg_index, cfg.rcnn_train_batch_size, cfg.rcnn_train_fg_fraction, num_proposal) # selecting both fg and bg fg_bg_index = np.concatenate([fg_index, bg_index], 0) sampled_proposal = proposals[fg_bg_index] # label sampled_assign = argmax_overlap[fg_bg_index] sampled_label = truth_labels[sampled_assign] sampled_label[num_fg:] = 0 # Clamp labels for the background to 0 # target if num_fg > 0: target_truth_box = truth_boxes[sampled_assign[:num_fg]] target_box = sampled_proposal[:num_fg][:, 1:5] sampled_target = rcnn_encode(target_box, target_truth_box) sampled_target = to_tensor(sampled_target, cfg.device) sampled_label = to_tensor(sampled_label, cfg.device) sampled_proposal = to_tensor(sampled_proposal, cfg.device) return sampled_proposal, sampled_label, sampled_assign, sampled_target
def make_one_rpn_target(cfg, anchor_boxes, truth_boxes): """ labeling windows for one image :param image: input image :param anchor_boxes: [[x0, y0, x1, y1]]: (N, 4) ndarray of float32 :param truth_boxes: [[x0, y0, x1, y1]]: (N, 4) ndarray of float32 :param truth_labels: [1, 1, 1, ...], (N, ) ndarray of int64 :return: anchor_labels: 1 for pos, 0 for neg anchor_assigns: which truth box is assigned to the anchor box label_weight: pos=1, neg \in (0, 1] by rareness, otherwise 0 (don't care) delta: bboxes' offsets delta_weight: same as label_weight """ num_anchor_boxes = len(anchor_boxes) anchor_labels = np.zeros((num_anchor_boxes,), np.int64) anchor_assigns = np.zeros((num_anchor_boxes,), np.int64) label_weight = np.ones((num_anchor_boxes,), np.float32) # <todo> why use 1 for init ? delta = np.zeros((num_anchor_boxes, 4), np.float32) delta_weight = np.zeros((num_anchor_boxes,), np.float32) num_truth_box = len(truth_boxes) if num_truth_box != 0: overlap = cython_box_overlap(anchor_boxes, truth_boxes) argmax_overlap = np.argmax(overlap, 1) max_overlap = overlap[np.arange(num_anchor_boxes), argmax_overlap] # anchor_labels 1/0 for each anchor bg_index = max_overlap < cfg.rpn_train_bg_thresh_high anchor_labels[bg_index] = 0 label_weight[bg_index] = 1 fg_index = max_overlap >= cfg.rpn_train_fg_thresh_low anchor_labels[fg_index] = 1 label_weight[fg_index] = 1 anchor_assigns[...] = argmax_overlap # for each truth, anchor_boxes with highest overlap, include multiple maxs # re-assign less overlapped gt to anchor_boxes argmax_overlap = np.argmax(overlap, 0) max_overlap = overlap[argmax_overlap, np.arange(num_truth_box)] anchor_assignto_gt, gt_assignto_anchor = np.where(overlap == max_overlap) fg_index = anchor_assignto_gt anchor_labels[fg_index] = 1 label_weight[fg_index] = 1 anchor_assigns[fg_index] = gt_assignto_anchor # regression fg_index = np.where(anchor_labels != 0) target_window = anchor_boxes[fg_index] target_truth_box = truth_boxes[anchor_assigns[fg_index]] delta[fg_index] = rpn_encode(target_window, target_truth_box) delta_weight[fg_index] = 1 # weights for class balancing fg_index = np.where((label_weight != 0) & (anchor_labels != 0))[0] bg_index = np.where((label_weight != 0) & (anchor_labels == 0))[0] num_fg = len(fg_index) num_bg = len(bg_index) label_weight[fg_index] = 1 label_weight[bg_index] = num_fg / num_bg # task balancing delta_weight[fg_index] = label_weight[fg_index] # save anchor_labels = to_tensor(anchor_labels, cfg.device) anchor_assigns = to_tensor(anchor_assigns, cfg.device) label_weight = to_tensor(label_weight, cfg.device) delta = to_tensor(delta, cfg.device) delta_weight = to_tensor(delta_weight, cfg.device) return anchor_labels, anchor_assigns, label_weight, delta, delta_weight
def make_one_mask_target(cfg, image, proposals, truth_box, truth_label, truth_instance): """ make mask targets for one image. 1. assign truth box to each proposals by threshold for fg/bg 2. crop assigned instance into bbox size 3. resize to maskhead's_train output size. :param image: image as (H, W, C) numpy array :param proposals: list of regional proposals generated by RCNN. e.g. [[i, x0, y0, x1, y1, score, label], ...] :param truth_box: list of truth boxes. e.g. [[x0, y0, x1, y1], ...] :param truth_label: 1s maskhead are used to predict mask, all masks are positive proposals. (foreground) here we have 2 classes so it's_train fixed to 1 :param truth_instance: list of truth instances, (H, W) :return: sampled_proposal: same as proposals sampled_label: same as truth_label sampled_instance: cropped instance, matching maskhead's_train output sampled_assign: index of truth_box each proposals belongs to """ sampled_proposal = torch.FloatTensor(0, 7).to(cfg.device) sampled_label = torch.LongTensor(0, 1).to(cfg.device) sampled_instance = torch.FloatTensor(0, 1, 1).to(cfg.device) # filter invalid proposals like small proposals _, height, width = image.size() num_proposal = len(proposals) valid = [] for i in range(num_proposal): box = proposals[i, 1:5] if not (is_small_box( box, min_size=cfg.mask_train_min_size)): # is_small_box_at_boundary valid.append(i) proposals = proposals[valid] num_proposal = len(proposals) if len(truth_box) > 0 and num_proposal > 0: # assign bbox to proposals by overlap threshold box = proposals[:, 1:5] # for each bbox, the index of gt which has max overlap with it overlap = cython_box_overlap(box, truth_box) argmax_overlap = np.argmax(overlap, 1) max_overlap = overlap[np.arange(num_proposal), argmax_overlap] fg_index = np.where(max_overlap >= cfg.mask_train_fg_thresh_low)[0] if len(fg_index) > 0: fg_length = len(fg_index) num_fg = cfg.mask_train_batch_size fg_index = fg_index[np.random.choice(fg_length, size=num_fg, replace=fg_length < num_fg)] sampled_proposal = proposals[fg_index] sampled_assign = argmax_overlap[ fg_index] # assign a gt to each bbox sampled_label = truth_label[ sampled_assign] # assign gt's_train label to each bbox sampled_instance = [] for i in range(len(fg_index)): instance = truth_instance[sampled_assign[ i]] # for each positive bbox, find instance it belongs to box = sampled_proposal[i, 1:5] crop = resize_instance( instance, box, cfg.mask_size) # crop the instance by box sampled_instance.append(crop[np.newaxis, :, :]) # save sampled_instance = np.vstack(sampled_instance) sampled_proposal = to_tensor(sampled_proposal, cfg.device) sampled_label = to_tensor(sampled_label, cfg.device) sampled_instance = to_tensor(sampled_instance, cfg.device) return sampled_proposal, sampled_label, sampled_instance
def _nms(cfg, mode, head, decode, images, logits, deltas, anchor_boxes=None, rpn_proposals=None): """ used for rpn and rcnn nms_func This function: 1. Do non-maximum suppression on given window and logistic score 2. filter small ret_proposals, crop border 3. decode bbox regression :param cfg: configure :param mode: mode. e.g. 'train', 'test', 'eval' :param images: a batch of input images :param anchor_boxes: all anchor boxes in a batch, list of coords, e.g. [[x0, y0, x1, y1], ...], a total of 16*16*3 + 32*32*3 + 64*64*3 + 128*128*3 :param logits_np: (B, N, 2) NOT nomalized [[0.7, 0.5], ...] :param deltas_np: (B, N, 2, 4) [[[t1, t2, t3, t4], [t1, t2, t3, t4]], ...] :return: all proposals in a batch. e.g. [i, x0, y0, x1, y1, score, label] proposals[0]: image idx in the batch proposals[1:5]: bbox proposals[5]: probability of foreground (background skipped) proposals[6]: class label, 1 fore foreground, 0 for background, here we only return 1 """ if mode in ['train']: nms_prob_threshold = cfg.rpn_train_nms_pre_score_threshold if head == 'rpn' else cfg.rcnn_train_nms_pre_score_threshold nms_overlap_threshold = cfg.rpn_train_nms_overlap_threshold if head == 'rpn' else cfg.rcnn_train_nms_overlap_threshold nms_min_size = cfg.rpn_train_nms_min_size if head == 'rpn' else cfg.rcnn_train_nms_min_size elif mode in ['valid', 'test', 'eval']: nms_prob_threshold = cfg.rpn_test_nms_pre_score_threshold if head == 'rpn' else cfg.rcnn_test_nms_pre_score_threshold nms_overlap_threshold = cfg.rpn_test_nms_overlap_threshold if head == 'rpn' else cfg.rcnn_test_nms_overlap_threshold nms_min_size = cfg.rpn_test_nms_min_size if head == 'rpn' else cfg.rcnn_test_nms_min_size if mode in ['eval']: nms_prob_threshold = 0.05 # set low numbe r to make roc curve. else: raise ValueError('rpn_nms(): invalid mode = %s?' % mode) num_classes = 2 if head == 'rpn' else cfg.num_classes logits_np = logits.detach().cpu().numpy() deltas_np = deltas.detach().cpu().numpy( ) if head == 'rpn' else deltas.detach().cpu().numpy().reshape( -1, num_classes, 4) batch_size, _, height, width = images.size() # non-max suppression ret_proposals = [] for img_idx in range(batch_size): pic_proposals = [np.empty((0, 7), np.float32)] if head == 'rpn': assert anchor_boxes is not None raw_box = anchor_boxes prob_distrib = np_softmax(logits_np[img_idx]) # (N, 2) delta_distrib = deltas_np[img_idx] # (N, 2, 4) else: # rcnn rpn_proposals_np = rpn_proposals.detach().cpu().numpy() select = np.where(rpn_proposals_np[:, 0] == img_idx)[0] if len(select) == 0: return torch.zeros((1, 7)).to(cfg.device) raw_box = rpn_proposals_np[select, 1:5] prob_distrib = np_softmax( logits_np[select]) # <todo>why not use np_sigmoid? delta_distrib = deltas_np[select] # skip background for cls_idx in range( 1, num_classes): # 0 for background, 1 for foreground index = np.where(prob_distrib[:, cls_idx] > nms_prob_threshold)[0] if len(index) > 0: valid_box = raw_box[index] prob = prob_distrib[index, cls_idx].reshape(-1, 1) delta = delta_distrib[index, cls_idx] # bbox regression, do some clip/filter box = decode(valid_box, delta) box = clip_boxes(box, width, height) # take care of borders keep = filter_boxes( box, min_size=nms_min_size) # get rid of small boxes if len(keep) > 0: box = box[keep] prob = prob[keep] keep = nms_func(np.hstack((box, prob)), nms_overlap_threshold) proposal = np.zeros((len(keep), 7), np.float32) proposal[:, 0] = img_idx proposal[:, 1:5] = np.around(box[keep], 0) proposal[:, 5] = prob[keep, 0] proposal[:, 6] = cls_idx pic_proposals.append(proposal) pic_proposals = np.vstack(pic_proposals) ret_proposals.append(pic_proposals) ret_proposals = np.vstack(ret_proposals) ret_proposals = to_tensor(ret_proposals, cfg.device) return ret_proposals
def mask_nms(cfg, images, proposals, mask_logits): """ 1. do non-maximum suppression to remove overlapping segmentations 2. resize the masks from mask head output (28*28) into box size 3. paste the masks into input image :param cfg: :param images: (B, C, H, W) :param proposals: (B, 7) [i, x0, y0, x1, y1, score, label] :param mask_logits: (B, num_classes, 2*crop_size, 2*crop_size) :return: b_multi_masks: (B, H, W) masks labelled with 1,2,...N (total number of masks) b_mask_instances: (B*N, H, W) masks with prob b_mask_proposals: (B*N, ) proposals """ overlap_threshold = cfg.mask_test_nms_overlap_threshold pre_score_threshold = cfg.mask_test_nms_pre_score_threshold mask_threshold = cfg.mask_test_mask_threshold mask_min_area = cfg.mask_test_mask_min_area proposals = proposals.detach().cpu().numpy() mask_logits = mask_logits.detach().cpu().numpy() mask_probs = np_sigmoid(mask_logits) b_multi_masks = [] b_mask_proposals = [] b_mask_instances = [] batch_size, C, H, W = images.size() for b in range(batch_size): multi_masks = np.zeros((H, W), np.float32) mask_proposals = [] mask_instances = [] num_keeps = 0 index = np.where((proposals[:, 0] == b) & (proposals[:, 5] > pre_score_threshold))[0] if len(index) > 0: instances = [] # all instances boxes = [] # all boxes for i in index: mask = np.zeros((H, W), np.float32) x0, y0, x1, y1 = proposals[i, 1:5].astype(np.int32) h, w = y1 - y0 + 1, x1 - x0 + 1 label = int(proposals[i, 6]) # get label of the instance crop = mask_probs[i, label] # get mask channel of the label crop = cv2.resize(crop, (w, h), interpolation=cv2.INTER_LINEAR) # crop = crop > mask_threshold # turn prob feature map into 0/1 mask mask[y0:y1 + 1, x0:x1 + 1] = crop # paste mask into empty mask instances.append(mask) boxes.append([x0, y0, x1, y1]) # compute box overlap, do cython_nms L = len(index) binary = [ instance_to_binary(m, mask_threshold, mask_min_area) for m in instances ] boxes = np.array(boxes, np.float32) box_overlap = cython_box_overlap(boxes, boxes) instance_overlap = np.zeros((L, L), np.float32) # calculate instance overlapping iou for i in range(L): instance_overlap[i, i] = 1 for j in range(i + 1, L): if box_overlap[i, j] < 0.01: continue x0 = int(min(boxes[i, 0], boxes[j, 0])) y0 = int(min(boxes[i, 1], boxes[j, 1])) x1 = int(max(boxes[i, 2], boxes[j, 2])) y1 = int(max(boxes[i, 3], boxes[j, 3])) mi = binary[i][y0:y1, x0:x1] mj = binary[j][y0:y1, x0:x1] intersection = (mi & mj).sum() union = (mi | mj).sum() instance_overlap[i, j] = intersection / (union + 1e-12) instance_overlap[j, i] = instance_overlap[i, j] # non-max-suppression to remove overlapping segmentation score = proposals[index, 5] sort_idx = list(np.argsort(-score)) # https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ keep = [] while len(sort_idx) > 0: i = sort_idx[0] keep.append(i) delete_index = list( np.where(instance_overlap[i] > overlap_threshold)[0]) sort_idx = [e for e in sort_idx if e not in delete_index] # filter instances & proposals num_keeps = len(keep) for i in range(num_keeps): k = keep[i] multi_masks[np.where(binary[k])] = i + 1 mask_instances.append(instances[k].reshape(1, H, W)) t = index[k] # t is the index of box before nms_func b, x0, y0, x1, y1, score, label = proposals[t] mask_proposals.append( np.array([b, x0, y0, x1, y1, score, label], np.float32)) if num_keeps == 0 or len(index) == 0: mask_proposals = np.zeros((0, 7), np.float32) mask_instances = np.zeros((0, H, W), np.float32) else: mask_proposals = np.vstack(mask_proposals) mask_instances = np.vstack(mask_instances) b_mask_proposals.append(mask_proposals) b_mask_instances.append(mask_instances) b_multi_masks.append(multi_masks) b_mask_proposals = np.vstack(b_mask_proposals) b_mask_proposals = to_tensor(b_mask_proposals, cfg.device) return b_multi_masks, b_mask_instances, b_mask_proposals