def get_delta_roi(filename, roi_rec, im_scale): trackid = roi_rec['gt_trackid'] boxes = roi_rec['boxes'] boxes = boxes * im_scale delta = np.zeros_like(roi_rec['boxes'], dtype=float) dic = {} tree = ET.parse(filename) size = tree.find('size') height = float(size.find('height').text) width = float(size.find('width').text) objs = tree.findall('object') for obj in objs: bbox = obj.find('bndbox') if roi_rec['flipped'] == False: np.minimum(float(bbox.find('ymax').text), roi_rec['height'] - 1) dic[int(obj.find('trackid').text)] = [ np.maximum(float(bbox.find('xmin').text), 0) * im_scale, np.maximum(float(bbox.find('ymin').text), 0) * im_scale, np.minimum(float(bbox.find('xmax').text), roi_rec['width'] - 1) * im_scale, np.minimum(float(bbox.find('ymax').text), roi_rec['height'] - 1) * im_scale ] else: xmin = np.maximum(float(bbox.find('xmin').text), 0) ymin = np.maximum(float(bbox.find('ymin').text), 0) xmax = np.minimum(float(bbox.find('xmax').text), roi_rec['width'] - 1) ymax = np.minimum(float(bbox.find('ymax').text), roi_rec['height'] - 1) xmin_flip = width - 1 - xmax xmax_flip = width - 1 - xmin assert xmax_flip >= xmin_flip dic[int(obj.find('trackid').text)] = [ xmin_flip * im_scale, ymin * im_scale, xmax_flip * im_scale, ymax * im_scale ] for i in range(len(trackid)): if trackid[i] in dic: delta_trans = bbox_transform(np.array([boxes[i]]), np.array([dic[trackid[i]]])) delta[i][:] = delta_trans[0] return delta
def sample_rois_v2(rois, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # set labels of bg_rois to be 0 bg_ind = np.where(overlaps < cfg.TRAIN.BG_THRESH_HI)[0] labels[bg_ind] = 0 # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment, :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def assign_pyramid_anchor( feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64), scales=(8, ), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False, ): """ assign ground truth boxes to anchor positions :param feat_shapes: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_strides: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :param balance_scale_bg: restrict the background samples for each pyramid level :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) ratios = np.array(ratios, dtype=np.float32) assert (len(feat_shapes) == len(feat_strides)) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future if len(scales.shape) == 1: base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) else: assert len(scales.shape) == len(ratios.shape) == 2 base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap fpn_labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 # subsample positive labels if we have too many num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int( cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = fpn_labels.shape[ 0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum( fpn_labels >= 1) bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0, len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id + 1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform( fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap( fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap( fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap( fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) # label.update({'label_p' + str(feat_id + feat_id_start): labels, # 'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets, # 'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights}) label = { 'label': np.concatenate(label_list, axis=1), 'bbox_target': np.concatenate(bbox_target_list, axis=2), 'bbox_weight': np.concatenate(bbox_weight_list, axis=2) } return label
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] if DEBUG: print 'anchors:' print base_anchors print 'anchor shapes:' print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4])) print 'im_info', im_info print 'height', feat_height, 'width', feat_width print 'gt_boxes shape', gt_boxes.shape print 'gt_boxes', gt_boxes # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means ** 2) print 'means', means print 'stdevs', stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = {'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights} return label
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64), scales=(8,), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False,): """ assign ground truth boxes to anchor positions :param feat_shapes: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_strides: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :param balance_scale_bg: restrict the background samples for each pyramid level :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) ratios = np.array(ratios, dtype=np.float32) assert(len(feat_shapes) == len(feat_strides)) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future if len(scales.shape) == 1: base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) else: assert len(scales.shape) == len(ratios.shape) == 2 base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap fpn_labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 # subsample positive labels if we have too many num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1) bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0, len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) # label.update({'label_p' + str(feat_id + feat_id_start): labels, # 'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets, # 'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights}) label = { 'label': np.concatenate(label_list, axis=1), 'bbox_target': np.concatenate(bbox_target_list, axis=2), 'bbox_weight': np.concatenate(bbox_weight_list, axis=2) } return label
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] ''' #yangyk print('gt_boxes:',gt_boxes[:,4]) print('gt_assignment:',gt_assignment) print('labels:',labels) print('rois shape:',rois.shape,'overlaps shape:',overlaps.shape,'labels shape',labels.shape) ''' # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) debug = False if debug: #yangyk print('fg_indexes size:', fg_indexes.size, 'fg_rois_per_image:', fg_rois_per_image, 'fg_rois_per_this_image:', fg_rois_per_this_image) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) #print('fg_over_laps:', overlaps[fg_indexes]) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] #yangyk labels_all = labels.copy() # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] #print('labels:',labels) # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) #yangyk overlaps = overlaps[keep_indexes] #print('fg_over_laps:', overlaps[:fg_rois_per_this_image]) neg_low = 0.0 neg_middle = 0.2 neg_high = 0.3 neg_indexes_L1 = np.where((overlaps < neg_middle) & (overlaps >= neg_low))[0] neg_indexes_L2 = np.where((overlaps < neg_high) & (overlaps >= neg_middle))[0] neg_indexes_L3 = np.where(overlaps >= neg_high)[0] neg_labels = np.zeros(labels.shape) #print(neg_indexes_L2) neg_labels[neg_indexes_L2] = labels_all[neg_indexes_L2] if debug: print('neg_indexes_L1:', len(neg_indexes_L1), 'neg_indexes_L2:', len(neg_indexes_L2), 'neg_indexes_L3', len(neg_indexes_L3)) print('labels_all:', labels_all) print('neg_labels:', neg_labels, 'neg_labels_shape:', neg_labels.shape) #print(neg_labels[neg_indexes_L2]) print('<<<fg neg labels>>>>', neg_labels[neg_indexes_L2]) print('fg neg labels sum', np.sum(neg_labels[neg_indexes_L2])) print('neg labels sum', np.sum(neg_labels)) print('over_laps:', overlaps) print('neg_fg_over_laps:', overlaps[neg_indexes_L2]) print('<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) return rois, labels, neg_labels, bbox_targets, bbox_weights
def sample_rois(self, rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, gt_masks=None): if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] if cfg.TRAIN.IGNORE_GAP: keep_inds = remove_repetition(rois[fg_indexes, 1:]) fg_indexes = fg_indexes[keep_inds] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = np.random.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] if cfg.TRAIN.IGNORE_GAP: keep_inds = remove_repetition(rois[bg_indexes, 1:]) bg_indexes = bg_indexes[keep_inds] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = np.random.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) if cfg.TRAIN.GAP_SELECT_FROM_ALL: gap_indexes = np.random.choice(range(len(rois)), size=gap, replace=False) else: bg_full_indexes = list(set(range(len(rois))) - set(fg_indexes)) gap_indexes = np.random.choice(bg_full_indexes, size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) if cfg.TRAIN.IGNORE_GAP: valid_rois_per_this_image = fg_rois_per_this_image + bg_rois_per_this_image labels[valid_rois_per_this_image:] = -1 bbox_weights[valid_rois_per_this_image:] = 0 # masks # debug_gt_image_buffer = cv2.imread('debug_im_buffer.jpg') mask_reg_targets = -np.ones( (len(keep_indexes), 1, self._mask_size, self._mask_size)) for idx, obj in enumerate(fg_indexes): gt_roi = np.round(gt_boxes[gt_assignment[obj], :-1]).astype(int) ex_roi = np.round(rois[idx, 1:]).astype(int) gt_mask = gt_masks[gt_assignment[obj]] mask_reg_target = intersect_box_mask(ex_roi, gt_roi, gt_mask) mask_reg_target = cv2.resize(mask_reg_target.astype(np.float), (self._mask_size, self._mask_size)) mask_reg_target = mask_reg_target >= self._binary_thresh mask_reg_targets[idx, ...] = mask_reg_target return rois, labels, bbox_targets, bbox_weights, mask_reg_targets
def assign_anchor(feat_shape_p2, feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6, gt_boxes, im_info, cfg, feat_stride_p2=4, scales_p2=(16, ), ratios_p2=(0.75, 1, 1.5), feat_stride_p3=8, scales_p3=(16, ), ratios_p3=(0.75, 1, 1.5), feat_stride_p4=16, scales_p4=(16, ), ratios_p4=(0.75, 1, 1.5), feat_stride_p5=32, scales_p5=(16, ), ratios_p5=(0.75, 1, 1.5), feat_stride_p6=64, scales_p6=(16, ), ratios_p6=(0.75, 1, 1.5), allowed_border=1000): """ assign ground truth boxes to anchor positions :param feat_shape: list of infer output shape :param gt_boxes: assign ground truth:[n, 5] :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ allowed_border = 1000 feat_shape = [ feat_shape_p2, feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6 ] feat_stride = [4, 8, 16, 32, 64] scales = scales_p3 ratios = (0.5, 1, 2) def _unmap(data, count, inds, fill=0, allowed_border=allowed_border): """" unmap a subset inds of data into original data of size count """ if allowed_border: return data if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False debug = True im_info = im_info[0] #print 'im_info: ', im_info scales = np.array(scales, dtype=np.float32) if len(feat_stride) != len(feat_shape): assert ('length of feat_stride is not equal to length of feat_shape') all_anchors_list = [] anchors_counter = [] total_anchors = 0 t = time.time() #print 'length of feat_shape: ',len(feat_shape) for i in range(len(feat_shape)): base_anchors = generate_anchors(base_size=feat_stride[i], ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] #3 #print feat_shape[i] feat_height, feat_width = (feat_shape[i])[-2:] if DEBUG: print 'anchors:' print base_anchors print 'anchor shapes:' print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4])) print 'im_info', im_info print 'height', feat_height, 'width', feat_width print 'gt_boxes shape', gt_boxes.shape print 'gt_boxes', gt_boxes # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride[i] shift_y = np.arange(0, feat_height) * feat_stride[i] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors #3 K = shifts.shape[0] #h*w i_all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) i_all_anchors = i_all_anchors.reshape( (K * A, 4)) #(k*A,4) in the original image all_anchors_list.append(i_all_anchors) i_total_anchors = int(K * A) #3*w*h total_anchors += i_total_anchors anchors_counter.append(total_anchors) # only keep anchors inside the image, but in FPN, author allowed anchor outside of image # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & # (all_anchors[:, 1] >= -allowed_border) & # (all_anchors[:, 2] < im_info[1] + allowed_border) & # (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', i_total_anchors #print 'inds_inside', len(inds_inside) # keep only inside anchors #anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape all_anchors = np.array(all_anchors_list[0]) #(3*h1*w1,4) for i_anchors in all_anchors_list[1:]: all_anchors = np.vstack((all_anchors, i_anchors)) #all_anchors:[total_anchors,4] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((total_anchors, ), dtype=np.float32) labels.fill(-1) #print 'get anchors spends :{:.4f}s'.format(time.time()-t) t_1 = time.time() if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) #t = time.time() overlaps = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) #print 'bbox overlaps spends :{:.4f}s'.format(time.time()-t) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 t_1_1 = time.time() # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(all_anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print 'means', means print 'stdevs', stds #print 'choose labels spends :{:.4f}s'.format(time.time()-t_1_1) #print 'sort labels spends :{:.4f}s'.format(time.time()-t_1) # map up to original set of anchors # print '---------++++++++++++++++++++++++++++++++-----------------',len(labels[labels!=-1]),len(labels[labels==1]) t_2 = time.time() labels_list = [] bbox_targets_list = [] bbox_weights_list = [] labels_list.append( _unmap(labels[:anchors_counter[0]], anchors_counter[0], range(anchors_counter[0]), fill=-1)) bbox_targets_list.append( _unmap(bbox_targets[range(anchors_counter[0]), :], anchors_counter[0], range(anchors_counter[0]), fill=0)) bbox_weights_list.append( _unmap(bbox_weights[range(anchors_counter[0]), :], anchors_counter[0], range(anchors_counter[0]), fill=0)) for i in range(1, len(feat_shape)): count = anchors_counter[i] - anchors_counter[i - 1] labels_list.append( _unmap(labels[anchors_counter[i - 1]:anchors_counter[i]], count, range(count), fill=-1)) bbox_targets_list.append( _unmap(bbox_targets[anchors_counter[i - 1]:anchors_counter[i], :], count, range(count), fill=0)) bbox_weights_list.append( _unmap(bbox_weights[anchors_counter[i - 1]:anchors_counter[i], :], count, range(count), fill=0)) if DEBUG: # print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count feat_heights = [] feat_widths = [] for i in range(len(feat_shape)): feat_heights.append(feat_shape[i][-2]) feat_widths.append(feat_shape[i][-1]) #print '_unmap spends :{:.4f}s'.format(time.time()-t_2) label1 = labels_list[0].reshape( (1, feat_heights[0], feat_widths[0], A)).transpose(0, 3, 1, 2) labels1 = label1.reshape((1, A * feat_heights[0] * feat_widths[0])) bbox_targets1 = bbox_targets_list[0].reshape( (1, feat_heights[0], feat_widths[0], A * 4)).transpose(0, 3, 1, 2) bbox_weights1 = bbox_weights_list[0].reshape( (1, feat_heights[0], feat_widths[0], A * 4)).transpose((0, 3, 1, 2)) label2 = labels_list[1].reshape( (1, feat_heights[1], feat_widths[1], A)).transpose(0, 3, 1, 2) labels2 = label2.reshape((1, A * feat_heights[1] * feat_widths[1])) bbox_targets2 = bbox_targets_list[1].reshape( (1, feat_heights[1], feat_widths[1], A * 4)).transpose(0, 3, 1, 2) bbox_weights2 = bbox_weights_list[1].reshape( (1, feat_heights[1], feat_widths[1], A * 4)).transpose((0, 3, 1, 2)) label3 = labels_list[2].reshape( (1, feat_heights[2], feat_widths[2], A)).transpose(0, 3, 1, 2) labels3 = label3.reshape((1, A * feat_heights[2] * feat_widths[2])) bbox_targets3 = bbox_targets_list[2].reshape( (1, feat_heights[2], feat_widths[2], A * 4)).transpose(0, 3, 1, 2) bbox_weights3 = bbox_weights_list[2].reshape( (1, feat_heights[2], feat_widths[2], A * 4)).transpose((0, 3, 1, 2)) if len(feat_shape) > 3: label4 = labels_list[3].reshape( (1, feat_heights[3], feat_widths[3], A)).transpose(0, 3, 1, 2) labels4 = label4.reshape((1, A * feat_heights[3] * feat_widths[3])) bbox_targets4 = bbox_targets_list[3].reshape( (1, feat_heights[3], feat_widths[3], A * 4)).transpose(0, 3, 1, 2) bbox_weights4 = bbox_weights_list[3].reshape( (1, feat_heights[3], feat_widths[3], A * 4)).transpose( (0, 3, 1, 2)) if len(feat_shape) > 4: label5 = labels_list[4].reshape( (1, feat_heights[4], feat_widths[4], A)).transpose(0, 3, 1, 2) labels5 = label5.reshape((1, A * feat_heights[4] * feat_widths[4])) bbox_targets5 = bbox_targets_list[4].reshape( (1, feat_heights[4], feat_widths[4], A * 4)).transpose(0, 3, 1, 2) bbox_weights5 = bbox_weights_list[4].reshape( (1, feat_heights[4], feat_widths[4], A * 4)).transpose( (0, 3, 1, 2)) if len(feat_shape) > 5: assert ( 'RPN anchorloader only support max number of feature map of 5!') # 'label/p4': labels2, 'label/p5': labels3, #, 'bbox_target/p4': bbox_targets2, 'bbox_target/p5': bbox_targets3, #, 'bbox_weight/p4': bbox_weights2, 'bbox_weight/p5': bbox_weights3 if len(feat_shape) == 3: label = { 'label/p3': labels1, 'label/p4': labels2, 'label/p5': labels3, 'bbox_target/p3': bbox_targets1, 'bbox_target/p4': bbox_targets2, 'bbox_target/p5': bbox_targets3, 'bbox_weight/p3': bbox_weights1, 'bbox_weight/p4': bbox_weights2, 'bbox_weight/p5': bbox_weights3, } elif len(feat_shape) == 4: label = { 'label/p3': labels1, 'label/p4': labels2, 'label/p5': labels3, 'label/p6': labels4, 'bbox_target/p3': bbox_targets1, 'bbox_target/p4': bbox_targets2, 'bbox_target/p5': bbox_targets3, 'bbox_target/p6': bbox_targets4, 'bbox_weight/p3': bbox_weights1, 'bbox_weight/p4': bbox_weights2, 'bbox_weight/p5': bbox_weights3, 'bbox_weight/p6': bbox_weights4 } elif len(feat_shape) == 5: label = { 'label/p2': labels1, 'label/p3': labels2, 'label/p4': labels3, 'label/p5': labels4, 'label/p6': labels5, 'bbox_target/p2': bbox_targets1, 'bbox_target/p3': bbox_targets2, 'bbox_target/p4': bbox_targets3, 'bbox_target/p5': bbox_targets4, 'bbox_target/p6': bbox_targets5, 'bbox_weight/p2': bbox_weights1, 'bbox_weight/p3': bbox_weights2, 'bbox_weight/p4': bbox_weights3, 'bbox_weight/p5': bbox_weights4, 'bbox_weight/p6': bbox_weights5 } #print 'get labels spends :{:.4f}s'.format(time.time()-t_2) return label
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps,overlaps1,overlaps2,tboxcenter_ins = bbox_overlaps_py1(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) boxcenter_ins=np.zeros(gt_assignment.shape[0]) for i in range(gt_assignment.shape[0]): boxcenter_ins[i]=tboxcenter_ins[i,gt_assignment[i]] overlaps = overlaps.max(axis=1) overlaps1 = overlaps1.max(axis=1) overlaps2 = overlaps2.max(axis=1) labels = gt_boxes[gt_assignment, 4] #print labels #print gt_boxes #print gt_assignment # foreground RoI with FG_THRESH overlap #print "gt_boxes:"+str(gt_boxes) new_order = np.argsort(overlaps) if DEBUG: print "overlaps:"+str(overlaps[new_order[-100:]]) print "overlaps1:"+str(overlaps1[new_order[-100:]]) print "overlaps2:"+str(overlaps2[new_order[-100:]]) print "boxcenter_ins"+str(boxcenter_ins[new_order[-100:]]) fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] if DEBUG: print "fg_indexes:"+str(fg_indexes) for i in range(len(overlaps)): if overlaps[i]>0.1: if overlaps1[i]>0.7: if boxcenter_ins[i]==1: if not(i in fg_indexes): fg_indexes = np.append(fg_indexes,i) if DEBUG: print "fg_indexes:"+str(fg_indexes) print "**********proposal-gt:"+str(len(fg_indexes)-gt_boxes.shape[0]) f_chan = open('channels.txt') sf_chan = f_chan.read() channels = sf_chan.split(" ") for ii in range(len(fg_indexes)-gt_boxes.shape[0]): if fg_indexes[ii] <len(channels): print channels[fg_indexes[ii]] print labels[fg_indexes[ii]] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def generate_proposals(self, cls_prob, bbox_pred, im_info): batch_size = cls_prob[0].shape[0] if batch_size > 1: raise ValueError("Sorry, multiple images for each device is not implemented.") pre_nms_topN = self.rpn_pre_nms_top_n post_nms_topN = self.rpn_post_nms_top_n min_size = self.rpn_min_size proposal_list = [] score_list = [] for idx in range(len(self.feat_stride)): stride = int(self.feat_stride[idx]) sub_anchors = generate_anchors(stride=stride, sizes=self.scales * stride, aspect_ratios=self.ratios) scores, bbox_deltas = cls_prob[idx], bbox_pred[idx] # 1. generate proposals from bbox_deltas and shifted anchors # use real image size instead of padded feature map sizes height, width = scores.shape[-3:-1] # enumerate all shifts shift_x = np.arange(0, width) * stride shift_y = np.arange(0, height) * stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = self.num_anchors K = shifts.shape[0] anchors = sub_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) anchors = anchors.reshape((K * A, 4)) bbox_deltas = bbox_deltas.reshape((-1, 4)) scores = scores.reshape((-1, 1)) if self.individual_proposals: if pre_nms_topN <= 0 or pre_nms_topN >= len(scores): order = np.argsort(-scores.squeeze()) else: inds = np.argpartition( -scores.squeeze(), pre_nms_topN )[:pre_nms_topN] order = np.argsort(-scores[inds].squeeze()) order = inds[order] bbox_deltas = bbox_deltas[order, :] anchors = anchors[order, :] scores = scores[order] # convert anchors into proposals via bbox transformations proposals = bbox_transform(anchors, bbox_deltas) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info[:2]) # 3. remove predicted boxes with either height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) # keep = self._filter_boxes(proposals, min_size * im_info[2]) keep = self._filter_boxes(proposals, min_size * im_info[2]) proposals = proposals[keep, :] scores = scores[keep] if self.individual_proposals: keep = self.nms_func(np.hstack((proposals, scores)).astype(np.float32)) if post_nms_topN > 0: keep = keep[:post_nms_topN] proposals = proposals[keep, :] scores = scores[keep] proposal_list.append(proposals) score_list.append(scores) proposals = np.vstack(proposal_list) scores = np.vstack(score_list) batch_inds = np.ones((proposals.shape[0], 1), dtype=np.float32) * self.batch_idx blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) return blob, scores
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4,8,16,16,16), scales = (8,8,8,16,32),ratios = (0.5,1,2), allowed_border = 0, balance_scale_bg = False): def _unmap(data, count, inds, fill = 0): if len(data.shape) == 1: ret = np.empty((count,),dtype = np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:],dtype = np.float32) ret.fill(fill) ret[inds,:] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype = np.float32) ratios = np.array(ratios, dtype = np.float32) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0,4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): base_anchors = generate_anchors(base_size = feat_strides[feat_id], ratios = ratios, scales = [scales[feat_id]]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] labels = np.empty((len(inds_inside),),dtype = np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid,len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors,anchors)) fpn_labels = np.hstack((fpn_labels,labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height,feat_width,A,total_anchors]) if gt_boxes.size > 0: overlaps = bbox_overlaps(fpn_anchors.astype(np.float),gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis = 1) max_overlaps = overlaps[np.arange(len(fpn_anchors)),argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis = 0) gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 fpn_labels[gt_argmax_overlaps] = 1 fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE ==-1 else int (cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size = (len(fg_inds) - num_fg), replace = False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels>=1) bg_inds = np.where(fpn_labels ==0)[0] fpn_anchors_fid = np.hstack((0,fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0,len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size = (len(bg_inds) - num_bg), replace = False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors),4),dtype = np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels>=1,:] = bbox_transform(fpn_anchors[fpn_labels>=1,:],gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) fpn_bbox_weights = np.zeros((len(fpn_anchors),4),dtype = np.float32) fpn_bbox_weights[fpn_labels>=1,:] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0,len(feat_strides)): feat_height, feat_width,A,total_anchors = fpn_args[feat_id] labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]],total_anchors,fpn_inds_inside[feat_id],fill = -1) bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape((1,feat_height, feat_width,A)).transpose(0,3,1,2) labels = labels.reshape((1,A*feat_height*feat_width)) bbox_targets = bbox_targets.reshape((1,feat_height,feat_width,A*4)).transpose(0,3,1,2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) label = { 'label':np.concatenate(label_list,axis = 1), 'bbox_target':np.concatenate(bbox_target_list, axis = 2), 'bbox_weight':np.concatenate(bbox_weight_list,axis = 2) } return label#label['label'] = 1,(A*w1*h1+A*w2*h2 +...),label['bbox_target'] = (1,4A,(w1h1+w2h2+...))
def assign_anchor(feat_shape_p4, feat_shape_p5, feat_shape_p6, feat_shape_p7, gt_boxes, im_info, cfg, feat_stride_p4=16, scales_p4=(8, ), ratios_p4=(0.75, 1, 1.5), feat_stride_p5=32, scales_p5=(8, ), ratios_p5=(0.75, 1, 1.5), feat_stride_p6=64, scales_p6=(8, ), ratios_p6=(0.75, 1, 1.5), feat_stride_p7=128, scales_p7=(8, ), ratios_p7=(0.75, 1, 1.5), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: list of infer output shape :param gt_boxes: assign ground truth:[n, 5] :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ feat_shapes = [feat_shape_p4, feat_shape_p5, feat_shape_p6, feat_shape_p7] feat_strides = [16, 32, 64, 128] scales = np.array(scales_p5) ratios = np.array(ratios_p5) def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future if len(scales.shape) == 1: base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) else: assert len(scales.shape) == len(ratios.shape) == 2 base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = [ind for ind in xrange(total_anchors)] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] labels = gt_boxes[argmax_overlaps, 4] labels[max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 fpn_labels = labels else: fpn_labels[:] = 0 # subsample positive labels if we have too many # num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) # fg_inds = np.where(fpn_labels >= 1)[0] # if len(fg_inds) > num_fg: # disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) # fpn_labels[disable_inds] = -1 # # subsample negative labels if we have too many # num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1) # bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) # if len(bg_inds) > num_bg: # disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) # fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: #fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap( fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap( fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap( fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) debug_label = np.concatenate(label_list, axis=1) # print debug_label # print"-----------total:",len(debug_label[0]) # print "--------ig-",len(debug_label[debug_label==-1]) # print "--------bg--",len(debug_label[debug_label==0]) # print "--------gg--",len(debug_label[debug_label>=1]) # print np.concatenate(label_list, axis=1)[np.concatenate(label_list, axis=1)>=1].shape #print np.concatenate(bbox_target_list, axis=2) label = { 'label': np.concatenate(label_list, axis=1), 'bbox_target': np.concatenate(bbox_target_list, axis=2), 'bbox_weight': np.concatenate(bbox_weight_list, axis=2) } return label
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, gt_kps=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :param gt_kps: optional for e2e [n, num_kps*3] (x1, y1, v1, ...) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) res = {'rois_output': rois, 'label' : labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights, } if gt_kps is not None: keep_kps = gt_kps[gt_assignment[keep_indexes]] n_keep = keep_kps.shape[0] K = cfg.dataset.NUM_KEYPOINTS assert gt_kps.shape[1] == K*3 G = cfg.network.KEYPOINTS_POOLED_SIZE kps_labels = np.empty([n_keep, K], dtype=np.float32) kps_labels.fill(-1) kps_targets = np.zeros([n_keep, K, G, G, 2], dtype=np.float32) kps_weights = kps_targets.copy() num_fg = fg_indexes.size assert num_fg > 0, 'need at least one roi' # assgin kp targets fg_kps_label, fg_kps_target, fg_kps_weight = assign_keypoints(rois[:num_fg, 1:], keep_kps[:num_fg], pooled_size=G) kps_labels[:num_fg] = fg_kps_label kps_targets[:num_fg] = fg_kps_target normalizer = 1.0 / (num_fg + 1e-3) kps_weights[:num_fg] = fg_kps_weight * normalizer res['kps_label'] = kps_labels.reshape([-1]) res['kps_target'] = kps_targets.transpose([0,1,4,2,3]).reshape([n_keep, -1, G, G]) res['kps_weight'] = kps_weights.transpose([0,1,4,2,3]).reshape([n_keep, -1, G, G]) return res
def assign_anchor(feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6, gt_boxes, im_info, cfg, feat_stride_p3=4, scales_p3=(8, ), ratios_p3=(0.75, 1, 1.5), feat_stride_p4=8, scales_p4=(8, ), ratios_p4=(0.75, 1, 1.5), feat_stride_p5=16, scales_p5=(8, ), ratios_p5=(0.75, 1, 1.5), feat_stride_p6=4, scales_p6=(8, ), ratios_p6=(0.75, 1, 1.5), allowed_border=1): """ assign ground truth boxes to anchor positions :param feat_shape: list of infer output shape :param gt_boxes: assign ground truth:[n, 5] :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ feat_shape = [feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6] feat_stride = [8, 16, 32, 64] scales = (8, 10, 12) ratios = (0.5, 1, 2) def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret im_info = im_info[0] #print 'im_info: ', im_info scales = np.array(scales, dtype=np.float32) if len(feat_stride) != len(feat_shape): assert ('length of feat_stride is not equal to length of feat_shape') labels_list = [] bbox_targets_list = [] bbox_weights_list = [] #print 'length of feat_shape: ',len(feat_shape) for i in range(len(feat_shape)): total_anchors = 0 base_anchors = generate_anchors(base_size=feat_stride[i], ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] #3 #print feat_shape[i] feat_height, feat_width = (feat_shape[i])[-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride[i] shift_y = np.arange(0, feat_height) * feat_stride[i] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors #3 K = shifts.shape[0] #h*w all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape( (K * A, 4)) #(k*A,4) in the original image # keep only inside anchors anchors = all_anchors # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & # (all_anchors[:, 1] >= -allowed_border) & # (all_anchors[:, 2] < im_info[1] + allowed_border) & # (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # label: 1 is positive, 0 is negative, -1 is dont care total_anchors = len(anchors) #3*w*h # anchors = all_anchors[inds_inside, :] labels = np.empty((total_anchors, ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) gt_labels = gt_boxes[:, -1] gt_labels_ = np.zeros((total_anchors, len(gt_labels)), dtype=np.int) gt_labels_[:, :] = gt_labels # print gt_labels_ labels = gt_labels_[np.arange(total_anchors), argmax_overlaps] max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps] # gt_argmax_overlaps = overlaps.argmax(axis=0) # gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 labels[(max_overlaps >= cfg.TRAIN.RPN_NEGATIVE_OVERLAP) & (max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP)] = -1 # bg_inds = np.where(labels == 0)[0] # if len(bg_inds) > 256: # disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - 256), replace=False) # labels[disable_inds] = -1 else: labels[:] = 0 # # print anchors[labels>0] # # a = anchors[labels>0].astype(np.int) # # np.savetxt('aa.txt',a,fmt="%d %d %d %d") # if len(anchors[labels>0])!=0: # aaa bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_weights[labels > 0, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) # map up to original set of anchors labels = _unmap(labels, int(K * A), range(total_anchors), fill=-1) bbox_targets = _unmap(bbox_targets, int(K * A), range(total_anchors), fill=0) bbox_weights = _unmap(bbox_weights, int(K * A), range(total_anchors), fill=0) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) labels_list.append(labels) bbox_targets_list.append(bbox_targets) bbox_weights_list.append(bbox_weights) if len(feat_shape) == 4: label = { 'label/p3': labels_list[0], 'label/p4': labels_list[1], 'label/p5': labels_list[2], 'label/p6': labels_list[3], 'bbox_target/p3': bbox_targets_list[0], 'bbox_target/p4': bbox_targets_list[1], 'bbox_target/p5': bbox_targets_list[2], 'bbox_target/p6': bbox_targets_list[3], 'bbox_weight/p3': bbox_weights_list[0], 'bbox_weight/p4': bbox_weights_list[1], 'bbox_weight/p5': bbox_weights_list[2], 'bbox_weight/p6': bbox_weights_list[3] } return label
def assign_quadrangle_anchor(feat_shape, gt_boxes, im_info, cfg, feat_strides=[64, 32, 16, 8, 4], scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) #base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) #num_anchors = base_anchors.shape[0] #feat_height, feat_width = feat_shape[-2:] anchors_list = [] anchors_num_list = [] inds_inside_list = [] feat_infos = [] A_list = [] for i in range(len(feat_strides)): base_anchors = generate_anchors(base_size=feat_strides[i], ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[i][-2:] feat_stride = feat_strides[i] feat_infos.append([feat_height, feat_width]) shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = num_anchors A_list.append(A) K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) anchors_num_list.append(total_anchors) # only keep anchors inside the image # print 'allowed_border is',allowed_border 0 inds_inside = np.where( (all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape anchors_list.append(anchors) inds_inside_list.append(inds_inside) anchors = np.concatenate(anchors_list) for i in range(1, len(inds_inside_list)): inds_inside_list[i] = inds_inside_list[i] + sum(anchors_num_list[:i]) inds_inside = np.concatenate(inds_inside_list) total_anchors = sum(anchors_num_list) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) gt_boxes_bbox = np.zeros((gt_boxes.shape[0], 4), dtype=gt_boxes.dtype) ex_x = np.vstack( (gt_boxes[:, 0], gt_boxes[:, 2], gt_boxes[:, 4], gt_boxes[:, 6])) ex_y = np.vstack( (gt_boxes[:, 1], gt_boxes[:, 3], gt_boxes[:, 5], gt_boxes[:, 7])) gt_boxes_bbox[:, 0] = np.amin(ex_x, axis=0) gt_boxes_bbox[:, 1] = np.amin(ex_y, axis=0) gt_boxes_bbox[:, 2] = np.amax(ex_x, axis=0) gt_boxes_bbox[:, 3] = np.amax(ex_y, axis=0) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes_bbox.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # temp = np.zeros((anchors.shape[0], 8), dtype=anchors.dtype) # temp[:, 0] = anchors[:, 0] # temp[:, 1] = anchors[:, 1] # temp[:, 2] = anchors[:, 2] # temp[:, 3] = anchors[:, 1] # temp[:, 4] = anchors[:, 2] # temp[:, 5] = anchors[:, 3] # temp[:, 6] = anchors[:, 0] # temp[:, 7] = anchors[:, 3] # eight_coordinate_anchors = temp if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes_bbox[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print 'means', means print 'stdevs', stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count # resahpe label_list = list() bbox_target_list = list() bbox_weight_list = list() anchors_num_range = [0] + anchors_num_list for i in range(len(feat_strides)): feat_height, feat_width = feat_infos[i] A = A_list[i] label = labels[sum(anchors_num_range[:i + 1]):sum(anchors_num_range[:i + 1]) + anchors_num_range[i + 1]] bbox_target = bbox_targets[sum(anchors_num_range[:i + 1] ):sum(anchors_num_range[:i + 1]) + anchors_num_range[i + 1]] bbox_weight = bbox_weights[sum(anchors_num_range[:i + 1] ):sum(anchors_num_range[:i + 1]) + anchors_num_range[i + 1]] label = label.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) label = label.reshape((1, A * feat_height * feat_width)) bbox_target = bbox_target.reshape( (1, feat_height * feat_width, A * 4)).transpose(0, 2, 1) bbox_weight = bbox_weight.reshape( (1, feat_height * feat_width, A * 4)).transpose((0, 2, 1)) label_list.append(label) bbox_target_list.append(bbox_target) bbox_weight_list.append(bbox_weight) label_concat = np.concatenate(label_list, axis=1) bbox_target_concat = np.concatenate(bbox_target_list, axis=2) bbox_weight_concat = np.concatenate(bbox_weight_list, axis=2) label = { 'label': label_concat, 'bbox_target': bbox_target_concat, 'bbox_weight': bbox_weight_concat } return label
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] if DEBUG: print('anchors:') print(base_anchors) print('anchor shapes:') print( np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4]))) print('im_info', im_info) print('height', feat_height, 'width', feat_width) print('gt_boxes shape', gt_boxes.shape) print('gt_boxes', gt_boxes) # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print('means', means) print('stdevs', stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print('rpn: max max_overlaps', np.max(max_overlaps)) print('rpn: num_positives', np.sum(labels == 1)) print('rpn: num_negatives', np.sum(labels == 0)) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = { 'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights } return label
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0, valid_ranges=None, invalid_anchor_threshold=0.3): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 if valid_ranges is None: # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = {'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights} return label else: all_labels, all_bbox_targets, all_bbox_weights = [], [], [] for valid_range in valid_ranges: cls_labels = labels.copy() if gt_boxes.size > 0: gt_boxes_sizes = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.) * (gt_boxes[:, 4] - gt_boxes[:, 2] + 1.) invalid_inds = np.where((gt_boxes_sizes < valid_range[0]**2) | (gt_boxes_sizes > valid_range[1]**2))[0] invalid_gt_boxes = gt_boxes[invalid_inds, :] if len(invalid_inds) > 0: invalid_overlaps = bbox_overlaps(anchors.astype(np.float), invalid_gt_boxes.astype(np.float)) invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1) invalid_max_overlaps = invalid_overlaps[np.arange(len(inds_inside)), invalid_argmax_overlaps] disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshold))[0] cls_labels[disable_inds] = -1 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(cls_labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) cls_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(cls_labels == 1) bg_inds = np.where(cls_labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) cls_labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[cls_labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) # map up to original set of anchors cls_labels = _unmap(cls_labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) cls_labels = cls_labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) cls_labels = cls_labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) all_labels.append(cls_labels) all_bbox_targets.append(bbox_targets) all_bbox_weights.append(bbox_weights) all_labels = np.vstack(all_labels) all_bbox_targets = np.vstack(all_bbox_targets) all_bbox_weights = np.vstack(all_bbox_weights) valid_ranges = np.array([[0, 90], [30, 160], [90, -1]], dtype=np.float32).reshape(-1, 2) valid_ranges *= im_info[2] inds = np.where(valid_ranges[:, 1] < 0)[0] valid_ranges[inds, 1] = max(im_info[0], im_info[1]) label = {'label': all_labels, 'bbox_target': all_bbox_targets, 'bbox_weight': all_bbox_weights, 'valid_ranges': valid_ranges} return label