def compute_bbox_regression_targets(rois, overlaps, labels): """ given rois, overlaps, gt labels, compute bounding box regression targets :param rois: roidb[i]['boxes'] k * 4 :param overlaps: roidb[i]['max_overlaps'] k * 1 :param labels: roidb[i]['max_classes'] k * 1 :return: targets[i][class, dx, dy, dw, dh] k * 5 """ # Ensure ROIs are floats rois = rois.astype(np.float, copy=False) # Sanity check if len(rois) != len(overlaps): print 'bbox regression: this should not happen' # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: print 'something wrong : zero ground truth rois' # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= config.TRAIN.BBOX_REGRESSION_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps(rois[ex_inds, :], rois[gt_inds, :]) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets
def assign_anchor(feat_shape, gt_boxes, im_info, config, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False debug = True im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] if DEBUG: print 'anchors:' print base_anchors print 'anchor shapes:' print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4])) print 'im_info', im_info print 'height', feat_height, 'width', feat_width print 'gt_boxes shape', gt_boxes.shape print 'gt_boxes', gt_boxes # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not config.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < config.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= config.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if config.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < config.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(config.TRAIN.RPN_FG_FRACTION * config.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = config.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(config.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print 'means', means print 'stdevs', stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: # print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = { 'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights } return label
def fpn_assign_anchor(feat_shape, gt_boxes, im_info, config, feat_stride=[4, 8, 16, 32, 64], scales=(8), ratios=(0.5, 1, 2), allowed_border=1): """ assign ground truth boxes to anchor positions :param feat_shape: list of infer output shape :param gt_boxes: assign ground truth:[n, 5] :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0, allowed_border=allowed_border): """" unmap a subset inds of data into original data of size count """ if allowed_border: return data if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False debug = True im_info = im_info[0] #print 'im_info: ', im_info scales = np.array(scales, dtype=np.float32) if len(feat_stride) != len(feat_shape): assert ('length of feat_stride is not equal to length of feat_shape') all_anchors_list = [] anchors_counter = [] total_anchors = 0 t = time.time() #print 'length of feat_shape: ',len(feat_shape) for i in range(len(feat_shape)): base_anchors = generate_anchors(base_size=feat_stride[i], ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] #3 #print feat_shape[i] feat_height, feat_width = (feat_shape[i])[-2:] if DEBUG: print 'anchors:' print base_anchors print 'anchor shapes:' print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4])) print 'im_info', im_info print 'height', feat_height, 'width', feat_width print 'gt_boxes shape', gt_boxes.shape print 'gt_boxes', gt_boxes # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride[i] shift_y = np.arange(0, feat_height) * feat_stride[i] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors #3 K = shifts.shape[0] #h*w i_all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) i_all_anchors = i_all_anchors.reshape( (K * A, 4)) #(k*A,4) in the original image all_anchors_list.append(i_all_anchors) i_total_anchors = int(K * A) #3*w*h total_anchors += i_total_anchors anchors_counter.append(total_anchors) # only keep anchors inside the image, but in FPN, author allowed anchor outside of image # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & # (all_anchors[:, 1] >= -allowed_border) & # (all_anchors[:, 2] < im_info[1] + allowed_border) & # (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', i_total_anchors #print 'inds_inside', len(inds_inside) # keep only inside anchors #anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape all_anchors = np.array(all_anchors_list[0]) #(3*h1*w1,4) for i_anchors in all_anchors_list[1:]: all_anchors = np.vstack((all_anchors, i_anchors)) #all_anchors:[total_anchors,4] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((total_anchors, ), dtype=np.float32) labels.fill(-1) #print 'get anchors spends :{:.4f}s'.format(time.time()-t) t_1 = time.time() if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) #t = time.time() overlaps = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) #print 'bbox overlaps spends :{:.4f}s'.format(time.time()-t) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not config.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < config.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= config.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if config.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < config.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 t_1_1 = time.time() # subsample positive labels if we have too many num_fg = int(config.TRAIN.RPN_FG_FRACTION * config.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = config.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(all_anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(config.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print 'means', means print 'stdevs', stds #print 'choose labels spends :{:.4f}s'.format(time.time()-t_1_1) #print 'sort labels spends :{:.4f}s'.format(time.time()-t_1) # map up to original set of anchors t_2 = time.time() labels_list = [] bbox_targets_list = [] bbox_weights_list = [] labels_list.append( _unmap(labels[:anchors_counter[0]], anchors_counter[0], range(anchors_counter[0]), fill=-1)) bbox_targets_list.append( _unmap(bbox_targets[range(anchors_counter[0]), :], anchors_counter[0], range(anchors_counter[0]), fill=0)) bbox_weights_list.append( _unmap(bbox_weights[range(anchors_counter[0]), :], anchors_counter[0], range(anchors_counter[0]), fill=0)) for i in range(1, len(feat_shape)): count = anchors_counter[i] - anchors_counter[i - 1] labels_list.append( _unmap(labels[anchors_counter[i - 1]:anchors_counter[i]], count, range(count), fill=-1)) bbox_targets_list.append( _unmap(bbox_targets[anchors_counter[i - 1]:anchors_counter[i], :], count, range(count), fill=0)) bbox_weights_list.append( _unmap(bbox_weights[anchors_counter[i - 1]:anchors_counter[i], :], count, range(count), fill=0)) if DEBUG: # print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count feat_heights = [] feat_widths = [] for i in range(len(feat_shape)): feat_heights.append(feat_shape[i][-2]) feat_widths.append(feat_shape[i][-1]) #print '_unmap spends :{:.4f}s'.format(time.time()-t_2) label1 = labels_list[0].reshape( (1, feat_heights[0], feat_widths[0], A)).transpose(0, 3, 1, 2) labels1 = label1.reshape((1, A * feat_heights[0] * feat_widths[0])) bbox_targets1 = bbox_targets_list[0].reshape( (1, feat_heights[0], feat_widths[0], A * 4)).transpose(0, 3, 1, 2) bbox_weights1 = bbox_weights_list[0].reshape( (1, feat_heights[0], feat_widths[0], A * 4)).transpose((0, 3, 1, 2)) label2 = labels_list[1].reshape( (1, feat_heights[1], feat_widths[1], A)).transpose(0, 3, 1, 2) labels2 = label2.reshape((1, A * feat_heights[1] * feat_widths[1])) bbox_targets2 = bbox_targets_list[1].reshape( (1, feat_heights[1], feat_widths[1], A * 4)).transpose(0, 3, 1, 2) bbox_weights2 = bbox_weights_list[1].reshape( (1, feat_heights[1], feat_widths[1], A * 4)).transpose((0, 3, 1, 2)) label3 = labels_list[2].reshape( (1, feat_heights[2], feat_widths[2], A)).transpose(0, 3, 1, 2) labels3 = label3.reshape((1, A * feat_heights[2] * feat_widths[2])) bbox_targets3 = bbox_targets_list[2].reshape( (1, feat_heights[2], feat_widths[2], A * 4)).transpose(0, 3, 1, 2) bbox_weights3 = bbox_weights_list[2].reshape( (1, feat_heights[2], feat_widths[2], A * 4)).transpose((0, 3, 1, 2)) if len(feat_shape) > 3: label4 = labels_list[3].reshape( (1, feat_heights[3], feat_widths[3], A)).transpose(0, 3, 1, 2) labels4 = label4.reshape((1, A * feat_heights[3] * feat_widths[3])) bbox_targets4 = bbox_targets_list[3].reshape( (1, feat_heights[3], feat_widths[3], A * 4)).transpose(0, 3, 1, 2) bbox_weights4 = bbox_weights_list[3].reshape( (1, feat_heights[3], feat_widths[3], A * 4)).transpose( (0, 3, 1, 2)) if len(feat_shape) > 4: label5 = labels_list[4].reshape( (1, feat_heights[4], feat_widths[4], A)).transpose(0, 3, 1, 2) labels5 = label5.reshape((1, A * feat_heights[4] * feat_widths[4])) bbox_targets5 = bbox_targets_list[4].reshape( (1, feat_heights[4], feat_widths[4], A * 4)).transpose(0, 3, 1, 2) bbox_weights5 = bbox_weights_list[4].reshape( (1, feat_heights[4], feat_widths[4], A * 4)).transpose( (0, 3, 1, 2)) if len(feat_shape) > 5: assert ( 'RPN anchorloader only support max number of feature map of 5!') if len(feat_shape) == 3: label = { 'n': labels1, 'label2': labels2, 'label3': labels3, 'bbox_target1': bbox_targets1, 'bbox_target2': bbox_targets2, 'bbox_target3': bbox_targets3, 'bbox_weight1': bbox_weights1, 'bbox_weight2': bbox_weights2, 'bbox_weight3': bbox_weights3 } elif len(feat_shape) == 4: label = { 'label1': labels1, 'label2': labels2, 'label3': labels3, 'label4': labels4, 'bbox_target1': bbox_targets1, 'bbox_target2': bbox_targets2, 'bbox_target3': bbox_targets3, 'bbox_target4': bbox_targets4, 'bbox_weight1': bbox_weights1, 'bbox_weight2': bbox_weights2, 'bbox_weight3': bbox_weights3, 'bbox_weight4': bbox_weights4 } elif len(feat_shape) == 5: label = { 'label1': labels1, 'label2': labels2, 'label3': labels3, 'label4': labels4, 'label5': labels5, 'bbox_target1': bbox_targets1, 'bbox_target2': bbox_targets2, 'bbox_target3': bbox_targets3, 'bbox_target4': bbox_targets4, 'bbox_target5': bbox_targets5, 'bbox_weight1': bbox_weights1, 'bbox_weight2': bbox_weights2, 'bbox_weight3': bbox_weights3, 'bbox_weight4': bbox_weights4, 'bbox_weight5': bbox_weights5 } #print 'get labels spends :{:.4f}s'.format(time.time()-t_2) return label
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, sample_type=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ debug = False if debug: pydevd.settrace('10.98.39.247', port=10001, stdoutToServer=True, stderrToServer=True) if gt_boxes[0, 4] == -1: # for empty image, only sample negtive samples roi_num = rois.shape[0] # print 'roi_num: %d' % roi_num if roi_num > rois_per_image: keep_indexes = npr.choice(range(roi_num), size=rois_per_image, replace=False) else: keep_indexes = npr.choice(range(roi_num), size=rois_per_image, replace=True) labels = np.zeros((roi_num), dtype=np.int32) labels = labels[keep_indexes] rois = rois[keep_indexes] bbox_targets = np.zeros((roi_num, 4 * num_classes), dtype=np.float32) bbox_target_data = bbox_targets[keep_indexes, :] else: if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= config.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < config.TRAIN.BG_THRESH_HI) & (overlaps >= config.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size # while keep_indexes.shape[0] < rois_per_image: # gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) # gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) # keep_indexes = np.append(keep_indexes, gap_indexes) if len(bg_indexes) != 0: if keep_indexes.shape[0] < rois_per_image: gap = rois_per_image - keep_indexes.shape[0] gap_index = npr.choice(bg_indexes, size=gap, replace=True) keep_indexes = np.append(keep_indexes, gap_index) elif len(bg_indexes) == 0: if keep_indexes.shape[0] < rois_per_image: gap = rois_per_image - keep_indexes.shape[0] gap_index = npr.choice(fg_indexes, size=gap, replace=True) keep_indexes = np.append(keep_indexes, gap_index) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 if len(bg_indexes) != 0: labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if config.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(config.TRAIN.BBOX_MEANS)) / np.array(config.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes) return rois, labels, bbox_targets, bbox_weights