def image_eval(pred, gt, ignore, iou_thresh): """ single image evaluation pred: Nx5 gt: Nx4 ignore: """ _pred = pred.copy() _gt = gt.copy() pred_recall = np.zeros(_pred.shape[0]) recall_list = np.zeros(_gt.shape[0]) proposal_list = np.ones(_pred.shape[0]) _pred[:, 2] = _pred[:, 2] + _pred[:, 0] _pred[:, 3] = _pred[:, 3] + _pred[:, 1] _gt[:, 2] = _gt[:, 2] + _gt[:, 0] _gt[:, 3] = _gt[:, 3] + _gt[:, 1] overlaps = bbox_overlaps(_pred[:, :4], _gt) for h in range(_pred.shape[0]): gt_overlap = overlaps[h] max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() if max_overlap >= iou_thresh: if ignore[max_idx] == 0: recall_list[max_idx] = -1 proposal_list[h] = -1 elif recall_list[max_idx] == 0: recall_list[max_idx] = 1 r_keep_index = np.where(recall_list == 1)[0] pred_recall[h] = len(r_keep_index) return pred_recall, proposal_list
def proposal_target_layer(rpn_bbox, rpn_cls_prob, gt_boxes, num_classes): confidence_scores = rpn_cls_prob[:, 1] # Add ground truth boxes as part of proposals rpn_bbox = np.vstack([rpn_bbox, gt_boxes[:, 0:-1]]) confidence_scores = np.concatenate(confidence_scores, np.ones(gt_boxes.shape[0], np.float32)) # Sample objects and backgrounds fg_cnt = int(BATCH_SIZE * FG_RATIO) fg_idxs = np.where(confidence_scores >= 0.5)[0] if len(fg_idxs) > fg_cnt: pos_inds = np.random.choice(fg_idxs, size=fg_cnt, replace=False) bg_cnt = BATCH_SIZE - len(pos_inds) bg_idxs = np.where((confidence_scores >= 0.1) & (confidence_scores < 0.5))[0] if len(bg_idxs) > bg_cnt: neg_inds = np.random.choice(bg_idxs, size=bg_cnt, replace=False) pos_bbox = rpn_bbox[pos_inds] overlaps = bbox.bbox_overlaps(pos_bbox, gt_boxes[:, 0:-1]) argmax_overlaps = np.argmax(overlaps, axis=-1) pos_labels = gt_boxes[:, -1][argmax_overlaps] + 1 neg_labels = np.zeros(len(neg_inds), np.int32) labels = np.concatenate(pos_labels, neg_labels) bbox_reg = np.zeros([len(labels), (num_classes + 1) * 4], np.float32) bbox_reg_ = bbox.bbox_transform(rpn_bbox, gt_boxes[argmax_overlaps][:, :-1]) for i in range(len(pos_labels)): bbox_reg[i, pos_labels[i] * 4:(pos_labels[i] + 1) * 4] = bbox_reg_[i] neg_bbox = rpn_bbox[neg_inds] rpn_bbox = np.vstack([pos_bbox, neg_bbox]) return labels, bbox_reg, rpn_bbox
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes, ), dtype=np.float32), }) return roidb
def imgEval2(pred, gt, iou_thresh, finalHolder): """ single image evaluation pred: Nx5 gt: Nx4 ignore: """ print(pred.shape, gt.shape) _pred = pred.copy() _gt = gt.copy() pred_recall = np.zeros(_pred.shape[0]) recall_list = np.zeros(_gt.shape[0]) proposal_list = np.ones(_pred.shape[0]) _pred[:, 2] = _pred[:, 2] + _pred[:, 0] _pred[:, 3] = _pred[:, 3] + _pred[:, 1] _gt[:, 2] = _gt[:, 2] + _gt[:, 0] _gt[:, 3] = _gt[:, 3] + _gt[:, 1] overlaps = bbox_overlaps(_pred[:, :4], _gt) print(overlaps.shape) overlaps = overlaps.T _p = pred.copy() #addin another parameter z = np.array([[0] for i in range(_p.shape[0])]) print("shape of z is ") print(z.shape) _p = np.concatenate((_p, z), axis=1) #doing it for second time _p = np.concatenate((_p, z), axis=1) print(_p) # input() for h in range(_gt.shape[0]): pred_overlap = overlaps[h] max_overlap, max_idx = pred_overlap.max(), pred_overlap.argmax() if (max_overlap >= _p[max_idx][6]): _p[max_idx][6] = max_overlap _p[max_idx][5] = 1 else: _p[max_idx][5] = 1 # finalHolder.append([max_overlap,_pred[h][4]) # if max_overlap >= iou_thresh: # if ignore[max_idx] == 0: # recall_list[max_idx] = -1 # proposal_list[h] = -1 # elif recall_list[max_idx] == 0: # recall_list[max_idx] = 1 # r_keep_index = np.where(recall_list == 1)[0] # pred_recall[h] = len(r_keep_index) for h in range(_pred.shape[0]): finalHolder.append([_p[h][4], _p[h][5], _p[h][6]])
def _sample_rois(self, all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= self.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < self.BG_THRESH_HI) & (max_overlaps >= self.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = self._compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ self._get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, bbox_inside_weights
def _calc_overlaps(self, anchors, gt_boxes, inds_inside): # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] return argmax_overlaps, max_overlaps, gt_max_overlaps, \ gt_argmax_overlaps
def image_eval(pred, gt, ignore, iou_thresh): """ single image evaluation pred: Nx5 gt: Nx4 ignore: """ _pred = pred.copy() _gt = gt.copy() pred_recall = np.zeros(_pred.shape[0]) recall_list = np.zeros(_gt.shape[0]) proposal_list = np.ones(_pred.shape[0]) #print('image_eval_gt: ',_gt) #print('_pred.shape[0]:',_pred.shape[0]) #print('_gt_size_befor:', _gt.shape) #idx = [] #for i in range(_gt.shape[0]): # if _gt[i,2] > 40 or _gt[i,2] > 100 or _gt[i,3] < 40 or _gt[i,3] > 100: # idx.append(i) #_gt = np.delete(_gt, idx) #print('_gt_size_after:', _gt.shape) _pred[:, 2] = _pred[:, 2] + _pred[:, 0] _pred[:, 3] = _pred[:, 3] + _pred[:, 1] _gt[:, 2] = _gt[:, 2] + _gt[:, 0] _gt[:, 3] = _gt[:, 3] + _gt[:, 1] overlaps = bbox_overlaps(_pred[:, :4], _gt) for h in range(_pred.shape[0]): gt_overlap = overlaps[h] max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() if max_overlap >= iou_thresh: if ignore[max_idx] == 0: recall_list[max_idx] = -1 proposal_list[h] = -1 elif recall_list[max_idx] == 0: recall_list[max_idx] = 1 #print('recall_list: ',recall_list) r_keep_index = np.where(recall_list == 1)[0] #print('r_keep_index:', h,' : ',len(r_keep_index)) pred_recall[h] = len(r_keep_index) #print('recall_list: ',recall_list) #print('r_keep_index: ',r_keep_index) return pred_recall, proposal_list
def image_eval(pred, gt, iou_thresh, name, output, _match, _least): """ single image evaluation pred: Nx5 gt: Nx4 ignore: """ _pred = pred.copy() _gt = gt.copy() _pred[:, 2] = _pred[:, 2] + _pred[:, 0] _pred[:, 3] = _pred[:, 3] + _pred[:, 1] _gt[:, 2] = _gt[:, 2] + _gt[:, 0] _gt[:, 3] = _gt[:, 3] + _gt[:, 1] overlaps = bbox_overlaps(_pred[:, :4], _gt) match = 0 for h in range(_pred.shape[0]): gt_overlap = overlaps[h] max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() if max_overlap >= iou_thresh: match += 1 if match == _match and gt.shape[0] > _least: items = name[0][0].split("_") folder = event_dir[items[0]] image_path = '../data/widerface/val/images/' + folder + "/" + name[0][0] + ".jpg" print(image_path) image = cv2.imread(image_path) img1 = image.copy() for i in range(np.shape(_gt)[0]): p1 = int(_gt[i][0]), int(_gt[i][1]) p2 = int(_gt[i][2]), int(_gt[i][3]) cv2.rectangle(img1, p1, p2, (0, 0, 255), thickness=1, lineType=cv2.LINE_AA) cv2.imwrite(output + name[0][0] + "gt.jpg", img1) img2 = image.copy() for i in range(np.shape(_pred)[0]): p1 = int(_pred[i][0]), int(_pred[i][1]) p2 = int(_pred[i][2]), int(_pred[i][3]) cv2.rectangle(img2, p1, p2, (0, 0, 255), thickness=1, lineType=cv2.LINE_AA) cv2.imwrite(output + name[0][0] + "pred.jpg", img2)
def neel_image_eval(pred, gt, finalHolder): """ single image evaluation pred: Nx5 gt: Nx4 ignore: """ # print(pred.shape,gt.shape) print(pred, gt) input() _pred = pred.copy() _gt = gt.copy() pred_recall = np.zeros(_pred.shape[0]) recall_list = np.zeros(_gt.shape[0]) proposal_list = np.ones(_pred.shape[0]) _pred[:, 2] = _pred[:, 2] + _pred[:, 0] _pred[:, 3] = _pred[:, 3] + _pred[:, 1] _gt[:, 2] = _gt[:, 2] + _gt[:, 0] _gt[:, 3] = _gt[:, 3] + _gt[:, 1] # print(_gt[0]) overlaps = bbox_overlaps(_pred[:, :4], _gt) print(overlaps) # input() # print(overlaps.shape) print("----------") # input() for h in range(_pred.shape[0]): gt_overlap = overlaps[h] max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() finalHolder.append([max_overlap, _pred[h][4]]) # if max_overlap >= iou_thresh: # if ignore[max_idx] == 0: # recall_list[max_idx] = -1 # proposal_list[h] = -1 # elif recall_list[max_idx] == 0: # recall_list[max_idx] = 1 r_keep_index = np.where(recall_list == 1)[0] pred_recall[h] = len(r_keep_index) return pred_recall, proposal_list
def image_eval(pred, gt, ignore, iou_thresh): """ single image evaluation. pred: Nx5 gt: Nx4 ignore: """ _pred = pred.copy() _gt = gt.copy() pred_recall = np.zeros(_pred.shape[0]) recall_list = np.zeros(_gt.shape[0]) proposal_list = np.ones(_pred.shape[0]) _pred[:, 2] = _pred[:, 2] + _pred[:, 0] # xmax = xmin + w _pred[:, 3] = _pred[:, 3] + _pred[:, 1] # ymax = ymin + h _gt[:, 2] = _gt[:, 2] + _gt[:, 0] _gt[:, 3] = _gt[:, 3] + _gt[:, 1] overlaps = bbox_overlaps(_pred[:, :4], _gt) for h in range(_pred.shape[0]): gt_overlap = overlaps[h] max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() # 1 number only. if max_overlap >= iou_thresh: # if pred n not correspond to sub gt. if ignore[max_idx] == 0: # pred n does not hit any of the sub gts. recall_list[max_idx] = -1 proposal_list[h] = -1 elif recall_list[max_idx] == 0: # pred n hit 1 sub gt, and the sub gt not hited before. recall_list[max_idx] = 1 r_keep_index = np.where(recall_list == 1)[0] # index of recall_list==1 pred_recall[h] = len(r_keep_index) return pred_recall, proposal_list
def anchor_target_layer(gt_boxes, all_anchors, image_shape, feature_map_shape, k): """ :param gt_boxes: :param all_anchors: :param image_shape: :param feature_map_shape: :param k: :return: """ # If there is no object in the image if len(gt_boxes) == 0: labels = np.zeros((len(all_anchors),), dtype=np.int32) targets = np.zeros(all_anchors.shape, dtype=np.float32) return labels, targets num_total_anchors = all_anchors.shape[0] # Keep anchors that inside the image valid_idx = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < image_shape[1]) & (all_anchors[:, 3] < image_shape[0]))[0] anchors = all_anchors[valid_idx, :] labels = np.empty((len(valid_idx),), dtype=np.int32) labels.fill(-1) overlaps = bbox.bbox_overlaps(anchors, gt_boxes) argmax_overlaps = np.argmax(overlaps, axis=1) max_overlaps = overlaps[np.arange(0, len(valid_idx), 1), argmax_overlaps] gt_argmax_overlaps = np.argmax(overlaps, axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(0, gt_boxes.shape[0], 1)] gt_max_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[np.where(max_overlaps < BG_LOW_THRES)[0]] = 0 labels[gt_max_overlaps] = 1 labels[np.where(max_overlaps > FG_HIGH_THRES)[0]] = 1 targets = bbox.bbox_transform(anchors, gt_boxes[argmax_overlaps, :]) # Sampling positive and negative anchors fg_cnt = int(SAMPLE_NUMBER * FG_RATIO) fg_idxs = np.where(labels == 1)[0] if len(fg_idxs) > fg_cnt: disable_inds = np.random.choice( fg_idxs, size=(len(fg_idxs) - fg_cnt), replace=False) labels[disable_inds] = -1 bg_cnt = SAMPLE_NUMBER - np.sum(labels == 1) bg_idxs = np.where(labels == 0)[0] if len(bg_idxs) > bg_cnt: disable_inds = np.random.choice( bg_idxs, size=(len(bg_idxs) - bg_cnt), replace=False) labels[disable_inds] = -1 labels = _unmap(labels, num_total_anchors, valid_idx, -1) targets = _unmap(targets, num_total_anchors, valid_idx, 0) return labels, targets
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride=[ 16, ], anchor_scales=[4, 8, 16, 32]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. """ # 对应了input[0~3] 'rpn_cls_score', 'gt_boxes', 'im_info', 'data' # input[0], rpn_cls_score, [batch_size, w, h, 18] , is the output of convolution # input[1], gt_boxes, [batch_size, 5], # input[2], im_info, [batch_size, 3], width, height, channel of image ?? # input[4], data, [batch_size, w, h, 3], image # _feat_stride = [16,] # anchor_scales = [8, 16, 32] # generate the anchors by the aspect ratios and the scales. base anchor = [0, 0, 15, 15] _anchors = generate_anchors(scales=np.array( anchor_scales)) # the number of anchors is 3*len(anchor_scales) _num_anchors = _anchors.shape[0] # the number of anchors if DEBUG: print 'anchors:' print _anchors print 'anchor shapes:' print np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], )) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) # the height and width of the feature map height, width = rpn_cls_score.shape[1:3] if DEBUG: print 'AnchorTargetLayer: height', height, 'width', width print '' print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'height, width: ({}, {})'.format(height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes', gt_boxes # 1. Generate proposals from bbox deltas and shifted anchors # 生成proposal shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] # the number of pixels in feature map all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) # 对应的features map里面的每个像素点都有A个像素点 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape # proposal 大体分为在原图像内部的,和有部分在原图像外部的 # 在原图像外部的,我们舍弃 # 在原图像内部的,我们又分为三种, positive,negative以及don't care # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) # np.ascontiguousarray 生成内存连续的数组 # overlaps: (N, K) ndarray of overlap between boxes and query_boxes,其中N代表的是len(anchor), K代表的是len(gt_boxs) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many # we set the value of RPN_BATCHSIZE to 256 which represent the number of proposals in each batch num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) # So far, we have determine the number of proposals is the RPN_BATCHSIZE bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # return the [targets_dx, targets_dy, targets_dw, targets_dh] represent the difference between the anchors and gt bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print 'means:' print means print 'stdevs:' print stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlap', np.max(max_overlaps) print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count # labels #pdb.set_trace() labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_inside_weights.shape[2] == height #assert bbox_inside_weights.shape[3] == width rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_outside_weights.shape[2] == height #assert bbox_outside_weights.shape[3] == width rpn_bbox_outside_weights = bbox_outside_weights # rpn_labels represent the label of each proposal, 1 is positive, 0 is negative, -1 is dont care # rpn_bbox_targets 表示了proposal的中心坐标,长宽与ground truth的中心坐标,长宽的差值 # rpn_bbox_inside_weights, rpn_bbox_outside_weights分别表示了计算loss的两个系数 # rpn_bbox_inside_weights 可用于指定那些结果参与 smooth L1 loss的运算, 注意只有positive proposal参与运算,其他都为0 # rpn_bbox_outside_weights用于normalization, 代表的是参与运算的proposal的权重 return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def proposal(img, gt_bboxes, detector=None): '''given an image with face bboxes, proposal negatives, positives and part faces for rNet and oNet, we use previous networks to proposal bboxes Return (negatives, positives, part) negatives: [data, bbox] positives: [(data, bbox, bbox_target)] part: [(data, bbox, bbox_target)] ''' # ======================= proposal for rnet and onet ============== if detector is not None: assert isinstance(detector, JfdaDetector) #print("HERE??>>96") bboxes = detector.detect(img, **cfg.DETECT_PARAMS) # # maybe sort it by score in descending order # bboxes = bboxes[bboxes[:, 4].argsort()[::-1]] # keep bbox info, drop score, offset and landmark bboxes = bboxes[:, :4] ovs = bbox_overlaps(bboxes, gt_bboxes) ovs_max = ovs.max(axis=1) ovs_idx = ovs.argmax(axis=1) pos_idx = np.where(ovs_max > cfg.FACE_OVERLAP)[0] neg_idx = np.where(ovs_max < cfg.NONFACE_OVERLAP)[0] part_idx = np.where( np.logical_and(ovs_max > cfg.PARTFACE_OVERLAP, ovs_max <= cfg.FACE_OVERLAP))[0] # pos positives = [] for idx in pos_idx: bbox = bboxes[idx].reshape(4) gt_bbox = gt_bboxes[ovs_idx[idx]] data = crop_face(img, bbox) if data is None: continue # cv2.imshow('pos', data) # cv2.waitKey() k = bbox[2] - bbox[0] bbox_target = (gt_bbox - bbox) / k positives.append((data, bbox, bbox_target)) # part part = [] for idx in part_idx: bbox = bboxes[idx].reshape(4) gt_bbox = gt_bboxes[ovs_idx[idx]] data = crop_face(img, bbox) if data is None: continue # cv2.imshow('part', data) # cv2.waitKey() k = bbox[2] - bbox[0] bbox_target = (gt_bbox - bbox) / k part.append((data, bbox, bbox_target)) # neg negatives = [] np.random.shuffle(neg_idx) for idx in neg_idx[:cfg.NEG_DETECT_PER_IMAGE]: bbox = bboxes[idx].reshape(4) data = crop_face(img, bbox) if data is None: continue # cv2.imshow('neg', data) # cv2.waitKey() negatives.append((data, bbox)) return negatives, positives, part # ======================= proposal for pnet ======================= height, width = img.shape[:-1] negatives, positives, part = [], [], [] # ===== proposal positives ===== for gt_bbox in gt_bboxes: x, y = gt_bbox[:2] w, h = gt_bbox[2] - gt_bbox[0], gt_bbox[3] - gt_bbox[1] this_positives = [] for scale in cfg.POS_PROPOSAL_SCALES: k = max(w, h) * scale stride = cfg.POS_PROPOSAL_STRIDE s = k * stride offset_x = (0.5 + np.random.rand()) * k / 2. offset_y = (0.5 + np.random.rand()) * k / 2. candidates = sliding_windows(x - offset_x, y - offset_y, w + 2 * offset_x, h + 2 * offset_y, k, k, s, s) ovs = bbox_overlaps(candidates, gt_bbox.reshape((1, 4))) ovs = ovs.reshape((1, len(candidates)))[0] pos_bboxes = candidates[ovs > cfg.FACE_OVERLAP, :] # pdb.set_trace() if len(pos_bboxes) > 0: np.random.shuffle(pos_bboxes) for bbox in pos_bboxes[:cfg.POS_PER_FACE]: data = crop_face(img, bbox) if data is None: continue # cv2.imshow('positive', data) # cv2.waitKey() bbox_target = (gt_bbox - bbox) / k this_positives.append((data, bbox, bbox_target)) random.shuffle(this_positives) positives.extend(this_positives[:cfg.POS_PER_FACE]) # ===== proposal part faces ===== for gt_bbox in gt_bboxes: x, y = gt_bbox[:2] w, h = gt_bbox[2] - gt_bbox[0], gt_bbox[3] - gt_bbox[1] this_part = [] for scale in cfg.PART_PROPOSAL_SCALES: k = max(w, h) * scale stride = cfg.PART_PROPOSAL_STRIDE s = k * stride offset_x = (0.5 + np.random.rand()) * k / 2. offset_y = (0.5 + np.random.rand()) * k / 2. candidates = sliding_windows(x - offset_x, y - offset_y, w + 2 * offset_x, h + 2 * offset_y, k, k, s, s) ovs = bbox_overlaps(candidates, gt_bbox.reshape((1, 4))) ovs = ovs.reshape((1, len(candidates)))[0] part_bboxes = candidates[np.logical_and( ovs > cfg.PARTFACE_OVERLAP, ovs <= cfg.FACE_OVERLAP), :] if len(part_bboxes) > 0: np.random.shuffle(part_bboxes) for bbox in part_bboxes[:cfg.PART_PER_FACE]: data = crop_face(img, bbox) if data is None: continue # cv2.imshow('part', data) # cv2.waitKey() bbox_target = (gt_bbox - bbox) / k this_part.append((data, bbox, bbox_target)) random.shuffle(this_part) part.extend(this_part[:cfg.POS_PER_FACE]) # ===== proposal negatives ===== for gt_bbox in gt_bboxes: x, y = gt_bbox[:2] w, h = gt_bbox[2] - gt_bbox[0], gt_bbox[3] - gt_bbox[1] this_negatives = [] for scale in cfg.NEG_PROPOSAL_SCALES: k = max(w, h) * scale stride = cfg.NEG_PROPOSAL_STRIDE s = k * stride offset_x = (0.5 + np.random.rand()) * k / 2. offset_y = (0.5 + np.random.rand()) * k / 2. candidates = sliding_windows(x - offset_x, y - offset_y, w + 2 * offset_x, h + 2 * offset_y, k, k, s, s) ovs = bbox_overlaps(candidates, gt_bboxes) neg_bboxes = candidates[ovs.max(axis=1) < cfg.NONFACE_OVERLAP, :] if len(neg_bboxes) > 0: np.random.shuffle(neg_bboxes) for bbox in neg_bboxes[:cfg.NEG_PER_FACE]: data = crop_face(img, bbox) if data is None: continue # cv2.imshow('negative', data) # cv2.waitKey() this_negatives.append((data, bbox)) random.shuffle(this_negatives) negatives.extend(this_negatives[:cfg.NEG_PER_FACE]) # negatives from global image random crop max_num_from_fr = int(cfg.NEG_PER_IMAGE * cfg.NEG_FROM_FR_RATIO) if len(negatives) > max_num_from_fr: random.shuffle(negatives) negatives = negatives[:max_num_from_fr] bbox_neg = [] range_x, range_y = width - cfg.NEG_MIN_SIZE, height - cfg.NEG_MIN_SIZE for i in range(0, cfg.NEG_PROPOSAL_RATIO * cfg.NEG_PER_IMAGE): x1, y1 = np.random.randint(range_x), np.random.randint(range_y) w = h = np.random.randint(low=cfg.NEG_MIN_SIZE, high=min(width - x1, height - y1)) x2, y2 = x1 + w, y1 + h bbox_neg.append([x1, y1, x2, y2]) if x2 > width or y2 > height: print('hhhh') bbox_neg = np.asarray(bbox_neg, dtype=gt_bboxes.dtype) ovs = bbox_overlaps(bbox_neg, gt_bboxes) bbox_neg = bbox_neg[ovs.max(axis=1) < cfg.NONFACE_OVERLAP] np.random.shuffle(bbox_neg) if not cfg.NEG_FORCE_BALANCE: remain = cfg.NEG_PER_IMAGE - len(negatives) else: # balance ratio from face region and global crop remain = len(negatives) * ( 1. - cfg.NEG_FROM_FR_RATIO) / cfg.NEG_FROM_FR_RATIO remain = int(remain) bbox_neg = bbox_neg[:remain] # for bbox in bbox_neg: # x1, y1, x2, y2 = bbox # x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2) # cv2.rectangle(img, (x1, y1), (x2, y2), (0, 0, 255), 1) # cv2.imshow('neg', img) # cv2.waitKey() for bbox in bbox_neg: data = crop_face(img, bbox) negatives.append((data, bbox)) return negatives, positives, part
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert area in areas, 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max( axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps }
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride=[ 16, ], anchor_scales=[ 16, ]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. Parameters ---------- rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] im_info: a list of [image_height, image_width, scale_ratios] _feat_stride: the downsampling ratio of feature map to the original input image anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) ---------- Returns ---------- rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) that are the regression objectives rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, beacuse the numbers of bgs and fgs mays significiantly different """ _anchors = generate_anchors( scales=np.array(anchor_scales)) # 生成基本的anchor,一共9个 _num_anchors = _anchors.shape[0] # 9个anchor if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) # height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] # 图像的高宽及通道数 if DEBUG: print("im_info: ", im_info) # 在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 # Algorithm: # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[ 0] == 1, 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # feature-map的高宽 if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order # K is H x W shifts = np.vstack( (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # 生成feature-map和真实image上anchor之间的偏移量 # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors # 9个anchor K = shifts.shape[0] # 50*37,feature-map的宽乘高的大小 all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) # 相当于复制宽高的维度,然后相加 all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image # 仅保留那些还在图像内部的anchor,超出图像的都删掉 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] # 保留那些在图像内的anchor if DEBUG: print('anchors.shape', anchors.shape) # 至此,anchor准备好了 # -------------------------------------------------------------- # label: 1 is positive, 0 is negative, -1 is dont care # (A) labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # 初始化label,均为-1 # overlaps between the anchors and the gt boxes # overlaps (ex, gt), shape is A x G # 计算anchor和gt-box的overlap,用来给anchor上标签 overlaps = bbox_overlaps(np.ascontiguousarray( anchors, dtype=np.float), np.ascontiguousarray( gt_boxes, dtype=np.float)) # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 # 存放每一个anchor和每一个gtbox之间的overlap argmax_overlaps = overlaps.argmax( axis=1) # (A)#找到和每一个gtbox,overlap最大的那个anchor max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax( axis=0) # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个 gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.RPN_NEGATIVE_OVERLAP] = 0 # 先给背景上标签,小于0.3overlap的 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # 每个位置上的9个anchor中overlap最大的认为是前景 # fg label: above threshold IOU labels[max_overlaps >= cfg.RPN_POSITIVE_OVERLAP] = 1 # overlap大于0.7的认为是前景 if cfg.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many # 对正样本进行采样,如果正样本的数量太多的话 # 限制正样本的数量不超过128个 num_fg = int(cfg.RPN_FG_FRACTION * cfg.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) # 随机去除掉一些正样本 labels[disable_inds] = -1 # 变为-1 # subsample negative labels if we have too many # 对负样本进行采样,如果负样本的数量太多的话 # 正负样本总数是256,限制正样本数目最多128, # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 num_bg = cfg.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # print "was %s inds, disabling %s, now %s inds" % ( # len(bg_inds), len(disable_inds), np.sum(labels == 0)) # 至此, 上好标签,开始计算rpn-box的真值 # -------------------------------------------------------------- bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets( anchors, gt_boxes[argmax_overlaps, :]) # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.RPN_BBOX_INSIDE_WEIGHTS) # 内部权重,前景就给1,其他是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.RPN_POSITIVE_WEIGHT < 0: # 暂时使用uniform 权重,也就是正样本是1,负样本是0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) + 1 # positive_weights = np.ones((1, 4)) * 1.0 / num_examples # negative_weights = np.ones((1, 4)) * 1.0 / num_examples positive_weights = np.ones((1, 4)) negative_weights = np.zeros((1, 4)) else: assert ((cfg.RPN_POSITIVE_WEIGHT > 0) & (cfg.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.RPN_POSITIVE_WEIGHT / (np.sum(labels == 1)) + 1) negative_weights = ((1.0 - cfg.RPN_POSITIVE_WEIGHT) / (np.sum(labels == 0)) + 1) bbox_outside_weights[labels == 1, :] = positive_weights # 外部权重,前景是1,背景是0 bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # 这些anchor的label是-1,也即dontcare bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) # 这些anchor的真值是0,也即没有值 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) # 内部权重以0填充 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # 外部权重以0填充 if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels labels = labels.reshape((1, height, width, A)) # reshap一下label rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) # reshape rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights if DEBUG: print("anchor target set") return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def label_anchors(self, anchors, gt_anchors, pos_threshold=0.7, neg_threshold=0.5): """Label each anchor (text or non-text). Args: anchors: A numpy array with shape [num_anchors, 4] contains the coordinates of each anchor. gt_anchors: A numpy array with shape [num_gt_anchors, 4] contains the coordinates of each ground-truth anchor. pos_threshold: A IoU threshold for determining an anchor is positive. neg_threshold: A IoU threshold for determining an anchor is negative. Returns: cls_anchors: A numpy array with shape [num_anchors] contains the class of each anchor. pos_anchors: A numpy array with shape [num_pos_anchors, 4] contains the coordinates of each positive anchor. """ # Array containing the label for each anchor cls_anchors = np.ones((anchors.shape[0]), dtype=np.int) * (-1) # Calculate the IoU between the anchors and the ground truth anchors overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_anchors, dtype=np.float)) # Labeling anchors # i. Negative anchors (< 0.5 IoU overlap with all GT boxes) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(anchors.shape[0]), argmax_overlaps] cls_anchors[max_overlaps < neg_threshold] = 0 # ii. The anchors with the highest IoU overlap with GT boxes. highest_argmax_overlaps = overlaps.argmax(axis=0) cls_anchors[highest_argmax_overlaps] = 1 highest_argmax_overlaps = np.array( [highest_argmax_overlaps, np.arange(len(highest_argmax_overlaps))]) # iii. Anchors that have > threhsold IoU overlap with any GT box valid_argmax_overlaps = np.where(overlaps > pos_threshold) cls_anchors[valid_argmax_overlaps[0]] = 1 mask = np.in1d(highest_argmax_overlaps[0], valid_argmax_overlaps[0]) new_anchors_id = np.where(~mask)[0] if len(np.where(~mask)[0]) > 0: pos_anchors = (np.append( valid_argmax_overlaps[0], highest_argmax_overlaps[0][new_anchors_id]), np.append( valid_argmax_overlaps[1], highest_argmax_overlaps[1][new_anchors_id])) else: pos_anchors = valid_argmax_overlaps return cls_anchors, pos_anchors