def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0, normalize_target=False, bbox_mean=(0.0, 0.0, 0.0, 0.0), bbox_std=(0.1, 0.1, 0.4, 0.4)): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :param normalize_target: normalize rpn target :param bbox_mean: anchor target mean :param bbox_std: anchor target std :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] if DEBUG: print('anchors:') print(base_anchors) print('anchor shapes:') print(np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4]))) print('im_info', im_info) print('height', feat_height, 'width', feat_width) print('gt_boxes shape', gt_boxes.shape) print('gt_boxes', gt_boxes) # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means ** 2) print('means', means) print('stdevs', stds) if normalize_target: bbox_targets = ((bbox_targets - np.array(bbox_mean)) / np.array(bbox_std)) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print('rpn: max max_overlaps', np.max(max_overlaps)) print('rpn: num_positives', np.sum(labels == 1)) print('rpn: num_negatives', np.sum(labels == 0)) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = {'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights} return label