def __init__(self, cfg, chip_size, max_n_gts=100, max_poly_len=500): self.scales = np.array(cfg.network.ANCHOR_SCALES, dtype=np.float32) self.ratios = cfg.network.ANCHOR_RATIOS feat_stride = cfg.network.RPN_FEAT_STRIDE self.max_n_gts = max_n_gts self.max_poly_len = max_poly_len # Initializing anchors base_anchors = generate_anchors(base_size=feat_stride, ratios=list(self.ratios), scales=list(self.scales)) self.num_anchors = base_anchors.shape[0] self.feat_width = chip_size / cfg.network.RPN_FEAT_STRIDE self.feat_height = chip_size / cfg.network.RPN_FEAT_STRIDE shift_x = np.arange(0, self.feat_width) * feat_stride shift_y = np.arange(0, self.feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() self.K = shifts.shape[0] all_anchors = base_anchors.reshape((1, self.num_anchors, 4)) + \ shifts.reshape((1, self.K, 4)).transpose((1, 0, 2)) self.all_anchors = all_anchors.reshape((self.K * self.num_anchors, 4)) self.batch_size = cfg.TRAIN.RPN_BATCH_SIZE self.pos_thresh = cfg.TRAIN.RPN_POSITIVE_OVERLAP self.neg_thresh = cfg.TRAIN.RPN_NEGATIVE_OVERLAP self.num_fg = int(self.batch_size * cfg.TRAIN.RPN_FG_FRACTION)
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] if DEBUG: print 'anchors:' print base_anchors print 'anchor shapes:' print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4])) print 'im_info', im_info print 'height', feat_height, 'width', feat_width print 'gt_boxes shape', gt_boxes.shape print 'gt_boxes', gt_boxes # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means ** 2) print 'means', means print 'stdevs', stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = {'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights} return label
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64), scales=(8,), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False,): """ assign ground truth boxes to anchor positions :param feat_shapes: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_strides: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :param balance_scale_bg: restrict the background samples for each pyramid level :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) ratios = np.array(ratios, dtype=np.float32) assert(len(feat_shapes) == len(feat_strides)) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future if len(scales.shape) == 1: base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) else: assert len(scales.shape) == len(ratios.shape) == 2 base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap fpn_labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 # subsample positive labels if we have too many num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1) bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0, len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) # label.update({'label_p' + str(feat_id + feat_id_start): labels, # 'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets, # 'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights}) label = { 'label': np.concatenate(label_list, axis=1), 'bbox_target': np.concatenate(bbox_target_list, axis=2), 'bbox_weight': np.concatenate(bbox_weight_list, axis=2) } return label
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0, normalize_target=False, bbox_mean=(0.0, 0.0, 0.0, 0.0), bbox_std=(0.1, 0.1, 0.4, 0.4)): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :param normalize_target: normalize rpn target :param bbox_mean: anchor target mean :param bbox_std: anchor target std :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] if DEBUG: print 'anchors:' print base_anchors print 'anchor shapes:' print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4])) print 'im_info', im_info print 'height', feat_height, 'width', feat_width print 'gt_boxes shape', gt_boxes.shape print 'gt_boxes', gt_boxes # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print 'means', means print 'stdevs', stds if normalize_target: bbox_targets = ((bbox_targets - np.array(bbox_mean)) / np.array(bbox_std)) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = { 'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights } return label
def assign_pyramid_anchor( feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64), scales=(8, ), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False, ): """ assign ground truth boxes to anchor positions :param feat_shapes: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_strides: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :param balance_scale_bg: restrict the background samples for each pyramid level :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret im_info = im_info[0] scales = np.array(scales, dtype=np.float32) ratios = np.array(ratios, dtype=np.float32) assert (len(feat_shapes) == len(feat_strides)) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future if len(scales.shape) == 1: base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) else: assert len(scales.shape) == len(ratios.shape) == 2 base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) #overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) overlaps, overlaps1, _, center_ins = bbox_overlaps_py1( anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] max_overlaps1 = overlaps1[np.arange(len(fpn_anchors)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap fpn_labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 #print center_ins for i in range(argmax_overlaps.shape[0]): if center_ins[i, argmax_overlaps[i]] == 1: if max_overlaps[i] >= cfg.TRAIN.RPN_POSITIVE_OVERLAP / 3: if max_overlaps1[i] >= 0.7: if DEBUG: print max_overlaps[i] print max_overlaps1[i] print fpn_labels[i] fpn_labels[i] = 1 else: fpn_labels[:] = 0 # subsample positive labels if we have too many num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int( cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = fpn_labels.shape[ 0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum( fpn_labels >= 1) bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0, len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id + 1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform( fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap( fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap( fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap( fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) # label.update({'label_p' + str(feat_id + feat_id_start): labels, # 'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets, # 'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights}) label = { 'label': np.concatenate(label_list, axis=1), 'bbox_target': np.concatenate(bbox_target_list, axis=2), 'bbox_weight': np.concatenate(bbox_weight_list, axis=2) } return label
def assign_anchor(feat_shape_p3, feat_shape_p4, feat_shape_p5, gt_boxes, im_info, cfg, feat_stride_p3=4, scales_p3=(8, ), ratios_p3=(0.75, 1, 1.5), feat_stride_p4=8, scales_p4=(8, ), ratios_p4=(0.75, 1, 1.5), feat_stride_p5=16, scales_p5=(8, ), ratios_p5=(0.75, 1, 1.5), allowed_border=1): """ assign ground truth boxes to anchor positions :param feat_shape: list of infer output shape :param gt_boxes: assign ground truth:[n, 5] :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ feat_shape = [feat_shape_p3, feat_shape_p4, feat_shape_p5] feat_stride = [4, 8, 16] scales = (8, 10, 12) ratios = (0.5, 1, 2) def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret im_info = im_info[0] #print 'im_info: ', im_info scales = np.array(scales, dtype=np.float32) if len(feat_stride) != len(feat_shape): assert ('length of feat_stride is not equal to length of feat_shape') labels_list = [] bbox_targets_list = [] bbox_weights_list = [] #print 'length of feat_shape: ',len(feat_shape) for i in range(len(feat_shape)): total_anchors = 0 base_anchors = generate_anchors(base_size=feat_stride[i], ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] #3 #print feat_shape[i] feat_height, feat_width = (feat_shape[i])[-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride[i] shift_y = np.arange(0, feat_height) * feat_stride[i] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors #3 K = shifts.shape[0] #h*w all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape( (K * A, 4)) #(k*A,4) in the original image # keep only inside anchors anchors = all_anchors # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & # (all_anchors[:, 1] >= -allowed_border) & # (all_anchors[:, 2] < im_info[1] + allowed_border) & # (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # label: 1 is positive, 0 is negative, -1 is dont care total_anchors = len(anchors) #3*w*h # anchors = all_anchors[inds_inside, :] labels = np.empty((total_anchors, ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) gt_labels = gt_boxes[:, -1] gt_labels_ = np.zeros((total_anchors, len(gt_labels)), dtype=np.int) gt_labels_[:, :] = gt_labels # print gt_labels_ labels = gt_labels_[np.arange(total_anchors), argmax_overlaps] max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps] # gt_argmax_overlaps = overlaps.argmax(axis=0) # gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 labels[(max_overlaps >= cfg.TRAIN.RPN_NEGATIVE_OVERLAP) & (max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP)] = -1 # bg_inds = np.where(labels == 0)[0] # if len(bg_inds) > 256: # disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - 256), replace=False) # labels[disable_inds] = -1 else: labels[:] = 0 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_weights[labels > 0, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) # map up to original set of anchors labels = _unmap(labels, int(K * A), range(total_anchors), fill=-1) bbox_targets = _unmap(bbox_targets, int(K * A), range(total_anchors), fill=0) bbox_weights = _unmap(bbox_weights, int(K * A), range(total_anchors), fill=0) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) labels_list.append(labels) bbox_targets_list.append(bbox_targets) bbox_weights_list.append(bbox_weights) if len(feat_shape) == 3: label = { 'label/p3': labels_list[0], 'label/p4': labels_list[1], 'label/p5': labels_list[2], 'bbox_target/p3': bbox_targets_list[0], 'bbox_target/p4': bbox_targets_list[1], 'bbox_target/p5': bbox_targets_list[2], 'bbox_weight/p3': bbox_weights_list[0], 'bbox_weight/p4': bbox_weights_list[1], 'bbox_weight/p5': bbox_weights_list[2] } return label
def forward(self, is_train, req, in_data, out_data, aux): nms = gpu_nms_wrapper(self._threshold, 0) batch_size = in_data[0].shape[0] if batch_size > 1: raise ValueError( "Sorry, multiple images each device is not implemented") # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) pre_nms_topN = self._rpn_pre_nms_top_n post_nms_topN = self._rpn_post_nms_top_n min_size = self._rpn_min_size # the first set of anchors are background probabilities # keep the second part scores_list = in_data[0].asnumpy() #[1,n] #print 'score_list shape:',scores_list.shape bbox_deltas_list = in_data[1].asnumpy() #[1,n*2] im_info = in_data[2].asnumpy()[0, :] feat_shape = in_data[3].asnumpy() #t = time.time() #print 'feat_shape:', feat_shape num_feat = feat_shape.shape[1] #[1,5,4] score_index_start = 0 bbox_index_start = 0 keep_proposal = [] keep_scores = [] #t_1 = time.time() for i in range(num_feat): feat_stride = int(self._feat_stride[i]) #4,8,16,32,64 #print 'feat_stride:', feat_stride anchor = generate_anchors(feat_stride, scales=self._scales, ratios=self._ratios) num_anchors = anchor.shape[0] #3 height = feat_shape[0, i, 2] width = feat_shape[0, i, 3] shift_x = np.arange(0, width) * feat_stride shift_y = np.arange(0, height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = num_anchors #3 K = shifts.shape[0] #height*width anchors = anchor.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) #3*height*widht,4 scores = (scores_list[ 0, int(score_index_start):int(score_index_start + K * A * 2)]).reshape( (1, int(2 * num_anchors), -1, int(width))) #1,2*3,h,w scores = scores[:, num_anchors:, :, :] #1,3,h,w bbox_deltas = (bbox_deltas_list[ 0, int(bbox_index_start):int(bbox_index_start + K * A * 4)]).reshape( (1, int(4 * num_anchors), -1, int(width))) #1,4*3,h,w score_index_start += K * A * 2 bbox_index_start += K * A * 4 bbox_deltas = self._clip_pad(bbox_deltas, (height, width)) bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape( (-1, 4)) #[1,h,w,12]--->[1*h*w*3,4] scores = self._clip_pad(scores, (height, width)) scores = scores.transpose((0, 2, 3, 1)).reshape( (-1, 1)) #[1,h,w,3]--->[1*h*w*3,1] proposals = bbox_pred(anchors, bbox_deltas) #debug here, corresponding? proposals = clip_boxes(proposals, im_info[:2]) keep = self._filter_boxes(proposals, min_size[i] * im_info[2]) keep_proposal.append(proposals[keep, :]) keep_scores.append(scores[keep]) proposals = keep_proposal[0] scores = keep_scores[0] for i in range(1, num_feat): proposals = np.vstack((proposals, keep_proposal[i])) scores = np.vstack((scores, keep_scores[i])) #print 'roi concate t_1 spends :{:.4f}s'.format(time.time()-t_1) # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) #t_2 = time.time() order = scores.ravel().argsort()[::-1] if pre_nms_topN > 0: order = order[:pre_nms_topN] proposals = proposals[order, :] scores = scores[order] #print 'roi concate t_2_1_1 spends :{:.4f}s'.format(time.time()-t_2) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #t_nms = time.time() det = np.hstack((proposals, scores)).astype(np.float32) keep = nms(det) #print 'roi concate nms spends :{:.4f}s'.format(time.time()-t_nms) if post_nms_topN > 0: keep = keep[:post_nms_topN] # pad to ensure output size remains unchanged if len(keep) < post_nms_topN: try: pad = npr.choice(keep, size=post_nms_topN - len(keep)) except: proposals = np.zeros((post_nms_topN, 4), dtype=np.float32) proposals[:, 2] = 16 proposals[:, 3] = 16 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack( (batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False)) return keep = np.hstack((keep, pad)) proposals = proposals[keep, :] scores = scores[keep] #print 'roi concate t_2 spends :{:.4f}s'.format(time.time()-t_2) # Output rois array # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) self.assign(out_data[0], req[0], blob) if self._output_score: self.assign(out_data[1], req[1], scores.astype(np.float32, copy=False))
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0, valid_ranges=None, invalid_anchor_threshold=0.3): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 if valid_ranges is None: # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = {'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights} return label else: all_labels, all_bbox_targets, all_bbox_weights = [], [], [] for valid_range in valid_ranges: cls_labels = labels.copy() if gt_boxes.size > 0: gt_boxes_sizes = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.) * (gt_boxes[:, 4] - gt_boxes[:, 2] + 1.) invalid_inds = np.where((gt_boxes_sizes < valid_range[0]**2) | (gt_boxes_sizes > valid_range[1]**2))[0] invalid_gt_boxes = gt_boxes[invalid_inds, :] if len(invalid_inds) > 0: invalid_overlaps = bbox_overlaps(anchors.astype(np.float), invalid_gt_boxes.astype(np.float)) invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1) invalid_max_overlaps = invalid_overlaps[np.arange(len(inds_inside)), invalid_argmax_overlaps] disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshold))[0] cls_labels[disable_inds] = -1 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(cls_labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) cls_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(cls_labels == 1) bg_inds = np.where(cls_labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) cls_labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[cls_labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) # map up to original set of anchors cls_labels = _unmap(cls_labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) cls_labels = cls_labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) cls_labels = cls_labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) all_labels.append(cls_labels) all_bbox_targets.append(bbox_targets) all_bbox_weights.append(bbox_weights) all_labels = np.vstack(all_labels) all_bbox_targets = np.vstack(all_bbox_targets) all_bbox_weights = np.vstack(all_bbox_weights) valid_ranges = np.array([[0, 90], [30, 160], [90, -1]], dtype=np.float32).reshape(-1, 2) valid_ranges *= im_info[2] inds = np.where(valid_ranges[:, 1] < 0)[0] valid_ranges[inds, 1] = max(im_info[0], im_info[1]) label = {'label': all_labels, 'bbox_target': all_bbox_targets, 'bbox_weight': all_bbox_weights, 'valid_ranges': valid_ranges} return label
def _proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, cfg_key, _feat_stride, anchor_scales): ''' # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # rpn_bbox_cls_prob shape : 1 , h , w , 2*9 # rpn_bbox_pred shape : 1 , h , w , 4*9 ''' _anchors = generate_anchor.generate_anchors( scales=np.array(anchor_scales)) # #_anchors ( 9, 4 ) _num_anchors = _anchors.shape[0] #9 rpn_bbox_cls_prob = np.transpose( rpn_bbox_cls_prob, [0, 3, 1, 2]) # rpn bbox _cls prob # 1, 18 , h , w rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) # 1, 36 , h , w # Only minibatch of 1 supported assert rpn_bbox_cls_prob.shape[0] == 1, \ 'Only single item batches are supported' if cfg_key == 'TRAIN': pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N #12000 post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N # 2000 nms_thresh = cfg.TRAIN.RPN_NMS_THRESH #0.7 min_size = cfg.TRAIN.RPN_MIN_SIZE # 16 else: # cfg_key == 'TEST': pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N nms_thresh = cfg.TEST.RPN_NMS_THRESH min_size = cfg.TEST.RPN_MIN_SIZE # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = rpn_bbox_cls_prob[:, _num_anchors:, :, :] # 1, 18 , H, W --> 1, 9, H, W # 1. Generate proposals from bbox deltas and shifted anchors height, width = scores.shape[-2:] # Enumerate all shifts shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # Enumerate all shifted anchors: # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] #anchors = _anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = np.array([]) for i in range(len(_anchors)): if i == 0: anchors = np.add(shifts, _anchors[i]) else: anchors = np.concatenate((anchors, np.add(shifts, _anchors[i])), axis=0) anchors = anchors.reshape((K * A, 4)) ## BBOX TRANSPOSE (1,4*A,H,W --> A*H*W,4) shape = rpn_bbox_pred.shape # 1,4*A , H, W rpn_bbox_pred = rpn_bbox_pred.reshape( [1, 4, (shape[1] // 4) * shape[2], shape[3]]) rpn_bbox_pred = rpn_bbox_pred.transpose([0, 2, 3, 1]) rpn_bbox_pred = rpn_bbox_pred.reshape([-1, 4]) bbox_deltas = rpn_bbox_pred ## CLS TRANSPOSE scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) # (h * w * A , 1) scores_ori = scores ## BBOX TRANSPOSE Using Anchor proposals = bbox_transform_inv(anchors, bbox_deltas) proposals_ori = proposals proposals = clip_boxes( proposals, im_dims) # image size 보다 큰 proposals 들이 줄어 들수 있도록 한다. keep = _filter_boxes(proposals, min_size) # min size = 16 # min보다 큰 놈들만 살아남았다 proposals = proposals[keep, :] scores = scores[keep] # 4. sort all (proposal, score) pairs by score from highest to lowest # 5. take top pre_nms_topN (e.g. 6000) #print 'scores : ',np.shape(scores) #421 ,13 <--여기 13이 자꾸 바귄다.. order = scores.ravel().argsort()[::-1] # 크기 순서를 뒤집는다 가장 큰 값이 먼저 오게 한다 if pre_nms_topN > 0: #120000 order = order[:pre_nms_topN] #print np.sum([scores>0.7]) scores = scores[order] # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) #print np.shape(np.hstack ((proposals , scores))) # --> [x_start , y_start ,x_end, y_end , score ] 이런 형태로 만든다 keep = nms(np.hstack((proposals, scores)), nms_thresh) # nms_thresh = 0.7 | hstack --> axis =1 if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] # Output rois blob # Our RPN implementation only supports a single input image, so all # batch inds are 0 batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # N , 5 #blob=np.hstack((blob , scores)) return blob, scores, proposals_ori, scores_ori