def compute_precision_for_box(box, truth_box, truth_label, threshold=[0.5]): num_truth_box = len(truth_box) num_box = len(box) overlap = cython_box_overlap(box, truth_box) argmax_overlap = np.argmax(overlap, 0) max_overlap = overlap[argmax_overlap, np.arange(num_truth_box)] invalid_truth_box = truth_box[truth_label < 0] invalid_valid_overlap = cython_box_overlap(box, invalid_truth_box) precision = [] recall = [] result = [] truth_result = [] for t in threshold: truth_r = np.ones(num_truth_box, np.int32) r = np.ones(num_box, np.int32) # truth_result truth_r[...] = INVALID truth_r[(max_overlap < t) & (truth_label > 0)] = MISS truth_r[(max_overlap >= t) & (truth_label > 0)] = HIT # result r[...] = FP r[argmax_overlap[truth_r == HIT]] = TP index = np.where(r == FP)[0] if len(index) > 0: index = index[np.where(invalid_valid_overlap[index] > t)[0]] r[index] = INVALID num_truth = (truth_r != INVALID).sum() num_hit = (truth_r == HIT).sum() num_miss = (truth_r == MISS).sum() rec = num_hit / num_truth num_tp = (r == TP).sum() num_fp = (r == FP).sum() prec = num_tp / max(num_tp + num_fp + num_miss, 1e-12) precision.append(prec) recall.append(rec) result.append(r) truth_result.append(truth_r) # if len(thresholds)==1: # precisions = precisions[0] # recalls = recalls[0] # results = results[0] # truth_results = truth_results[0] return precision, recall, result, truth_result
def _make_one_rpn_target(cfg, image, anchor_boxes, truth_boxes, truth_labels): """ labeling windows for one image :param image: input image :param anchor_boxes: list of bboxes e.g. [x0, y0, x1, y1] :param truth_boxes: list of boxes, e.g. [x0, y0, x1, y1] :param truth_labels: 1 for sure :return: label: 1 for pos, 0 for neg label_assign: which truth box is assigned to the window label_weight: pos=1, neg \in (0, 1] by rareness, otherwise 0 (don't care) target: bboxes' offsets target_weight: same as label_weight """ num_anchor_boxes = len(anchor_boxes) label = np.zeros((num_anchor_boxes, ), np.float32) label_assign = np.zeros((num_anchor_boxes, ), np.int32) label_weight = np.ones((num_anchor_boxes, ), np.float32) # <todo> why use 1 for init ? target = np.zeros((num_anchor_boxes, 4), np.float32) target_weight = np.zeros((num_anchor_boxes, ), np.float32) num_truth_box = len(truth_boxes) if num_truth_box != 0: _, height, width = image.size() overlap = cython_box_overlap(anchor_boxes, truth_boxes) argmax_overlap = np.argmax(overlap, 1) max_overlap = overlap[np.arange(num_anchor_boxes), argmax_overlap] # label 1/0 for each anchor bg_index = max_overlap < cfg.rpn_train_bg_thresh_high label[bg_index] = 0 label_weight[bg_index] = 1 fg_index = max_overlap >= cfg.rpn_train_fg_thresh_low label[fg_index] = 1 label_weight[fg_index] = 1 label_assign[...] = argmax_overlap # for each truth, anchor_boxes with highest overlap, include multiple maxs # re-assign less overlapped gt to anchor_boxes argmax_overlap = np.argmax(overlap, 0) max_overlap = overlap[argmax_overlap, np.arange(num_truth_box)] anchor_assignto_gt, gt_assignto_anchor = np.where( overlap == max_overlap) fg_index = anchor_assignto_gt label[fg_index] = 1 label_weight[fg_index] = 1 label_assign[fg_index] = gt_assignto_anchor # regression fg_index = np.where(label != 0) target_window = anchor_boxes[fg_index] target_truth_box = truth_boxes[label_assign[fg_index]] target[fg_index] = rpn_encode(target_window, target_truth_box) target_weight[fg_index] = 1 # don't care invalid_truth_label = np.where(truth_labels < 0)[0] invalid_index = np.isin(label_assign, invalid_truth_label) & (label != 0) label_weight[invalid_index] = 0 target_weight[invalid_index] = 0 # weights for class balancing fg_index = np.where((label_weight != 0) & (label != 0))[0] bg_index = np.where((label_weight != 0) & (label == 0))[0] num_fg = len(fg_index) num_bg = len(bg_index) label_weight[fg_index] = 1 label_weight[bg_index] = num_fg / num_bg if cfg.rpn_train_scale_balance: # weights for scale balancing num_scales = len(cfg.rpn_scales) num_bases = [len(b) for b in cfg.rpn_base_apsect_ratios] start = 0 for l in range(num_scales): h, w = int(height // 2**l), int(width // 2**l) end = start + h * w * num_bases[l] label_weight[start:end] *= (2**l)**2 start = end # task balancing target_weight[fg_index] = label_weight[fg_index] # save label = Variable(torch.from_numpy(label)).cuda() label_assign = Variable(torch.from_numpy(label_assign)).cuda() label_weight = Variable(torch.from_numpy(label_weight)).cuda() target = Variable(torch.from_numpy(target)).cuda() target_weight = Variable(torch.from_numpy(target_weight)).cuda() return label, label_assign, label_weight, target, target_weight
def _make_one_rcnn_target(cfg, image, proposals, truth_boxes, truth_labels): """ make rcnn target for ONE IMAGE, sampling labels https://github.com/ruotianluo/pytorch-faster-rcnn :param image: input image :param proposals: i is the index if image in batch: [i, x0, y0, x1, y1, score, label, 0] :param truth_boxes: list of boxes, e.g. [x0, y0, x1, y1] :param truth_labels: label of each truth box :return: sampled_proposal: 1 for pos, 0 for neg sampled_label: label of sampled truth box sampled_assign: which truth box is assigned to the sampled proposal sampled_target: bboxes' offsets from sampled proposals to truth boxes """ sampled_proposal = Variable(torch.FloatTensor((0, 8))).cuda() sampled_label = Variable(torch.LongTensor((0, 1))).cuda() sampled_assign = np.array((0, 1), np.int32) sampled_target = Variable(torch.FloatTensor((0, 4))).cuda() if len(truth_boxes) == 0 or len(proposals) == 0: return sampled_proposal, sampled_label, sampled_assign, sampled_target # filter invalid proposals _, height, width = image.size() num_proposal = len(proposals) valid = [] for i in range(num_proposal): box = proposals[i, 1:5] if not (is_small_box(box, min_size=cfg.mask_train_min_size)): valid.append(i) if len(valid) == 0: return sampled_proposal, sampled_label, sampled_assign, sampled_target proposals = proposals[valid] # assign fg/bg to each proposal num_proposal = len(proposals) box = proposals[:, 1:5] # for each bbox, the index of gt which has max overlap with it overlap = cython_box_overlap(box, truth_boxes) argmax_overlap = np.argmax(overlap, 1) max_overlap = overlap[np.arange(num_proposal), argmax_overlap] fg_index = np.where(max_overlap >= cfg.rcnn_train_fg_thresh_low)[0] bg_index = np.where((max_overlap < cfg.rcnn_train_bg_thresh_high) & \ (max_overlap >= cfg.rcnn_train_bg_thresh_low))[0] # sampling for class balance num_classes = cfg.num_classes num = cfg.rcnn_train_batch_size num_fg = int( np.round(cfg.rcnn_train_fg_fraction * cfg.rcnn_train_batch_size)) # Small modification to the original version where we ensure a fixed number of regions are sampled # https://github.com/precedenceguo/mx-rcnn/commit/3853477d9155c1f340241c04de148166d146901d fg_length = len(fg_index) bg_length = len(bg_index) if fg_length > 0 and bg_length > 0: num_fg = min(num_fg, fg_length) fg_index = fg_index[np.random.choice(fg_length, size=num_fg, replace=fg_length < num_fg)] num_bg = num - num_fg bg_index = bg_index[np.random.choice(bg_length, size=num_bg, replace=bg_length < num_bg)] # no bgs elif fg_length > 0: num_fg = num num_bg = 0 fg_index = fg_index[np.random.choice(fg_length, size=num_fg, replace=fg_length < num_fg)] # no fgs elif bg_length > 0: num_fg = 0 num_bg = num bg_index = bg_index[np.random.choice(bg_length, size=num_bg, replace=bg_length < num_bg)] # no bgs and no fgs? else: num_fg = 0 num_bg = num bg_index = np.random.choice(num_proposal, size=num_bg, replace=num_proposal < num_bg) assert ((num_fg + num_bg) == num) # selecting both fg and bg index = np.concatenate([fg_index, bg_index], 0) sampled_proposal = proposals[index] # label sampled_assign = argmax_overlap[index] sampled_label = truth_labels[sampled_assign] sampled_label[num_fg:] = 0 # Clamp labels for the background to 0 # target if num_fg > 0: target_truth_box = truth_boxes[sampled_assign[:num_fg]] target_box = sampled_proposal[:num_fg][:, 1:5] sampled_target = bbox_encode(target_box, target_truth_box) sampled_target = Variable(torch.from_numpy(sampled_target)).cuda() sampled_label = Variable(torch.from_numpy(sampled_label)).long().cuda() sampled_proposal = Variable(torch.from_numpy(sampled_proposal)).cuda() return sampled_proposal, sampled_label, sampled_assign, sampled_target
def mask_nms(cfg, images, proposals, mask_logits): """ 1. do non-maximum suppression to remove overlapping segmentations 2. resize the masks from mask head output (28*28) into proposal size 3. paste the masks into input image #<todo> better nms for mask :param cfg: :param images: (B, C, H, W) :param proposals: (B, 8) [i, x0, y0, x1, y1, score, label, z] :param mask_logits: (B, num_classes, 2*crop_size, 2*crop_size) :return: b_multi_masks: (B, H, W) masks labelled with 1,2,...N (total number of masks) b_mask_instances: (B*N, H, W) masks with prob b_mask_proposals: (B*N, ) proposals """ overlap_threshold = cfg.mask_test_nms_overlap_threshold pre_score_threshold = cfg.mask_test_nms_pre_score_threshold mask_threshold = cfg.mask_test_mask_threshold mask_min_area = cfg.mask_test_mask_min_area proposals = proposals.cpu().data.numpy() mask_logits = mask_logits.cpu().data.numpy() mask_probs = np_sigmoid(mask_logits) b_multi_masks = [] b_mask_proposals = [] b_mask_instances = [] batch_size, C, H, W = images.size() for b in range(batch_size): multi_masks = np.zeros((H, W), np.float32) # multi masks for a image mask_proposals = [] # proposals for a image mask_instances = [] # instances for a image num_keeps = 0 index = np.where((proposals[:, 0] == b) & (proposals[:, 5] > pre_score_threshold))[0] if len(index) != 0: instances = [] # all instances boxes = [] # all boxes for i in index: mask = np.zeros((H, W), np.float32) x0, y0, x1, y1 = proposals[i, 1:5].astype(np.int32) h, w = y1-y0+1, x1-x0+1 label = int(proposals[i, 6]) # get label of the instance crop = mask_probs[i, label] # get mask channel of the label crop = cv2.resize(crop, (w, h), interpolation=cv2.INTER_LINEAR) # crop = crop > mask_threshold # turn prob feature map into 0/1 mask mask[y0:y1+1, x0:x1+1] = crop # paste mask into empty mask instances.append(mask) boxes.append([x0, y0, x1, y1]) # compute box overlap, do nms L = len(index) binary = [instance_to_binary(m, mask_threshold, mask_min_area) for m in instances] boxes = np.array(boxes, np.float32) box_overlap = cython_box_overlap(boxes, boxes) instance_overlap = np.zeros((L, L), np.float32) # calculate instance overlapping iou for i in range(L): instance_overlap[i, i] = 1 for j in range(i+1, L): if box_overlap[i, j] < 0.01: continue x0 = int(min(boxes[i, 0], boxes[j, 0])) y0 = int(min(boxes[i, 1], boxes[j, 1])) x1 = int(max(boxes[i, 2], boxes[j, 2])) y1 = int(max(boxes[i, 3], boxes[j, 3])) mi = binary[i][y0:y1, x0:x1] mj = binary[j][y0:y1, x0:x1] intersection = (mi & mj).sum() union = (mi | mj).sum() instance_overlap[i, j] = intersection/(union + 1e-12) instance_overlap[j, i] = instance_overlap[i, j] # non-max-suppression to remove overlapping segmentation score = proposals[index, 5] sort_idx = list(np.argsort(-score)) # https://www.pyimagesearch.com/2015/02/16/faster-non-maximum-suppression-python/ keep = [] while len(sort_idx) > 0: i = sort_idx[0] keep.append(i) delete_index = list(np.where(instance_overlap[i] > overlap_threshold)[0]) sort_idx = [e for e in sort_idx if e not in delete_index] # filter instances & proposals num_keeps = len(keep) for i in range(num_keeps): k = keep[i] multi_masks[np.where(binary[k])] = i + 1 mask_instances.append(instances[k].reshape(1, H, W)) t = index[k] b, x0, y0, x1, y1, score, label, _ = proposals[t] mask_proposals.append(np.array([b, x0, y0, x1, y1, score, label, t], np.float32)) if num_keeps==0: mask_proposals = np.zeros((0,8 ),np.float32) mask_instances = np.zeros((0,H,W),np.float32) else: mask_proposals = np.vstack(mask_proposals) mask_instances = np.vstack(mask_instances) b_mask_proposals.append(mask_proposals) b_mask_instances.append(mask_instances) b_multi_masks.append(multi_masks) b_mask_proposals = Variable(torch.from_numpy(np.vstack(b_mask_proposals))).cuda() return b_multi_masks, b_mask_instances, b_mask_proposals
def _make_one_mask_target(cfg, mode, image, proposals, truth_box, truth_label, truth_instance): """ make mask targets for one image. 1. assign truth box to each proposals by threshold for fg/bg 2. crop assigned instance into bbox size 3. resize to maskhead's_train output size. :param image: image as (H, W, C) numpy array :param proposals: list of regional proposals generated by RCNN. e.g. [[i, x0, y0, x1, y1, score, label], ...] :param truth_box: list of truth boxes. e.g. [[x0, y0, x1, y1], ...] :param truth_label: 1s maskhead are used to predict mask, all masks are positive proposals. (foreground) here we have 2 classes so it's_train fixed to 1 :param truth_instance: list of truth instances, (H, W) :return: sampled_proposal: same as proposals sampled_label: same as truth_label sampled_instance: cropped instance, matching maskhead's_train output sampled_assign: index of truth_box each proposals belongs to """ sampled_proposal = Variable(torch.FloatTensor(0, 8)).cuda() sampled_label = Variable(torch.LongTensor (0, 1)).cuda() sampled_instance = Variable(torch.FloatTensor(0, 1, 1)).cuda() if len(truth_box) == 0 or len(proposals) == 0: return sampled_proposal, sampled_label, sampled_instance # filter invalid proposals like small proposals _, height, width = image.size() num_proposal = len(proposals) valid = [] for i in range(num_proposal): box = proposals[i, 1:5] if not(is_small_box(box, min_size=cfg.mask_train_min_size)): # is_small_box_at_boundary valid.append(i) if len(valid) == 0: return sampled_proposal, sampled_label, sampled_instance proposals = proposals[valid] # assign bbox to proposals by overlap threshold num_proposal = len(proposals) box = proposals[:, 1:5] # for each bbox, the index of gt which has max overlap with it overlap = cython_box_overlap(box, truth_box) argmax_overlap = np.argmax(overlap, 1) max_overlap = overlap[np.arange(num_proposal), argmax_overlap] fg_index = np.where(max_overlap >= cfg.mask_train_fg_thresh_low)[0] if len(fg_index) == 0: return sampled_proposal, sampled_label, sampled_instance fg_length = len(fg_index) num_fg = cfg.mask_train_batch_size fg_index = fg_index[ np.random.choice(fg_length, size=num_fg, replace=fg_length < num_fg) ] sampled_proposal = proposals[fg_index] sampled_assign = argmax_overlap[fg_index] # assign a gt to each bbox sampled_label = truth_label[sampled_assign] # assign gt's_train label to each bbox sampled_instance = [] for i in range(len(fg_index)): instance = truth_instance[sampled_assign[i]] # for each positive bbox, find instance it belongs to box = sampled_proposal[i, 1:5] crop = _crop_instance(instance, box, cfg.mask_size) # crop the instance by box sampled_instance.append(crop[np.newaxis, :, :]) # save sampled_instance = np.vstack(sampled_instance) sampled_proposal = Variable(torch.from_numpy(sampled_proposal)).cuda() sampled_label = Variable(torch.from_numpy(sampled_label)).long().cuda() sampled_instance = Variable(torch.from_numpy(sampled_instance)).cuda() return sampled_proposal, sampled_label, sampled_instance