def cpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4): """ Wrapper function for mask voting, note we already know the class of boxes and masks """ masks = masks.astype(np.float32) mask_size = masks.shape[-1] nms = py_nms_wrapper(nms_thresh) # apply nms and sort to get first images according to their scores # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] num_detect = boxes.shape[0] res_mask = [[] for _ in xrange(num_detect)] for i in xrange(num_detect): box = np.round(boxes[i]).astype(int) mask = cv2.resize(masks[i, 0].astype(np.float32), (box[2] - box[0] + 1, box[3] - box[1] + 1)) res_mask[i] = mask list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) masks_ar = np.zeros((num_boxes, 1, mask_size, mask_size)) boxes_ar = np.zeros((num_boxes, 4)) for i in xrange(num_boxes): # Get weights according to their segmentation scores cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) # Re-format mask when passing it to mask_aggregation p_mask = [res_mask[j] for j in list(cur_inds)] # do mask aggregation orig_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], p_mask, cur_weights, im_width, im_height, binary_thresh) masks_ar[i, 0] = cv2.resize(orig_mask.astype(np.float32), (mask_size, mask_size)) boxes_scored_ar = np.hstack((boxes_ar, t_scores[c][:, np.newaxis])) list_result_box[c] = boxes_scored_ar list_result_mask[c] = masks_ar return list_result_mask, list_result_box
def create_roidb_from_box_list(self, box_list, mapping_list, gt_roidb): """ given ground truth, prepare roidb :param box_list: [image_index] ndarray of [box_index][x1, x2, y1, y2] :param gt_roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] :return: roidb: [image_index]['boxes', 'gt_classes', 'gt_overlaps', 'flipped'] """ self.num_images = len(gt_roidb) assert len(box_list) == self.num_images, 'number of boxes matrix must match number of images' roidb = [] stats = np.zeros(81) for i in range(self.num_images): roi_rec = dict() roi_rec['image'] = gt_roidb[i]['image'] roi_rec['height'] = gt_roidb[i]['height'] roi_rec['width'] = gt_roidb[i]['width'] boxes = box_list[i] if boxes.shape[1] == 5: scores = boxes[:, -1] boxes = boxes[:, :4] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) # for each box in n boxes, select only maximum overlap (must be greater than zero) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] for k in range(len(maxes)): if maxes[k] > 0.5: stats[gt_classes[argmaxes[k]]] = stats[gt_classes[argmaxes[k]]] + 1 else: stats[0] = stats[0] + 1 roi_rec.update({'boxes': boxes, 'gt_classes': np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps': overlaps, 'max_classes': overlaps.argmax(axis=1), 'max_overlaps': overlaps.max(axis=1), 'flipped': False, 'proposal_scores': scores}) # background roi => background class zero_indexes = np.where(roi_rec['max_overlaps'] == 0)[0] assert all(roi_rec['max_classes'][zero_indexes] == 0) # foreground roi => foreground class nonzero_indexes = np.where(roi_rec['max_overlaps'] > 0)[0] assert all(roi_rec['max_classes'][nonzero_indexes] != 0) roidb.append(roi_rec) return roidb
def get_scores_per_class(bbox_per_class, gt_box_per_class, score_per_class): pass # bbox [FIRST_N, 4] # gt_box [, 4] # score [FIRST_N] num_valid_gt = len(gt_box_per_class) output_list_per_class = [] if num_valid_gt == 0: return output_list_per_class overlap_mat = bbox_overlaps( bbox_per_class.astype(np.float), gt_box_per_class[:, :-1].astype(np.float)) eye_matrix = np.eye(num_valid_gt) output_list_per_class = [] for thresh in self._target_thresh: # following mAP metric overlap_mask = (overlap_mat > thresh) valid_bbox_indices = np.where(overlap_mask)[0] # require score be 2-dim # [first_n, num_valid_gt] overlap_score = np.tile(score_per_class, (1, num_valid_gt)) overlap_score *= overlap_mask max_overlap_indices = np.argmax(overlap_mat, axis=1) # [first_n, num_valid_gt] max_overlap_mask = eye_matrix[max_overlap_indices] overlap_score *= max_overlap_mask output_list_per_class.append(overlap_score) return output_list_per_class
def forward(self, is_train, req, in_data, out_data, aux): # bbox, [first_n, num_fg_classes, 4] bbox = in_data[0].asnumpy() num_boxes = bbox.shape[0] num_fg_classes = bbox.shape[1] gt_box = in_data[1].asnumpy() # score, [first_n, num_fg_classes] score = in_data[2].asnumpy() batch_image, num_gt, code_size = gt_box.shape assert batch_image == 1, 'only support batch_image=1, but receive %d' % num_gt assert code_size == 5, 'code_size of gt should be 5, but receive %d' % code_size assert len( score.shape) == 2, 'shape of score is %d instead of 2.' % len( score.shape) assert score.shape[ 1] == num_fg_classes, 'number of fg classes should be same for boxes and scores' output_list = [] for cls_idx in range(0, num_fg_classes): valid_gt_mask = (gt_box[0, :, -1].astype(np.int32) == (cls_idx + 1)) valid_gt_box = gt_box[0, valid_gt_mask, :] num_valid_gt = len(valid_gt_box) if num_valid_gt == 0: output = np.zeros(shape=(num_boxes, self._num_thresh), dtype=np.float32) output_list.append(output) else: bbox_per_class = bbox[:, cls_idx, :] score_per_class = score[:, cls_idx:cls_idx + 1] overlap_mat = bbox_overlaps( bbox_per_class.astype(np.float), valid_gt_box[:, :-1].astype(np.float)) eye_matrix = np.eye(num_valid_gt) output_list_per_class = [] for thresh in self._target_thresh: # following mAP metric overlap_mask = (overlap_mat > thresh) valid_bbox_indices = np.where(overlap_mask)[0] # require score be 2-dim overlap_score = np.tile(score_per_class, (1, num_valid_gt)) overlap_score *= overlap_mask max_overlap_indices = np.argmax(overlap_mat, axis=1) max_overlap_mask = eye_matrix[max_overlap_indices] overlap_score *= max_overlap_mask max_score_indices = np.argmax(overlap_score, axis=0) output = np.zeros((num_boxes, )) output[np.intersect1d(max_score_indices, valid_bbox_indices)] = 1 output_list_per_class.append(output) output_per_class = np.stack(output_list_per_class, axis=-1) output_list.append(output_per_class) blob = np.stack(output_list, axis=1).astype(np.float32, copy=False) self.assign(out_data[0], req[0], blob)
def check_rois(rois, gt_boxes): ''' :param rois: numpy, (128, 5) gt_boxes: numpy :return: num of fg_rois with iou > 0.5 ''' overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) overlaps = overlaps.max(axis=1) fg_indexes = np.where(overlaps >= 0.5)[0] print('check proposals: {}'.format(fg_indexes.shape))
def resample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) return keep_indexes
def sample_rois_v2(rois, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # set labels of bg_rois to be 0 bg_ind = np.where(overlaps < cfg.TRAIN.BG_THRESH_HI)[0] labels[bg_ind] = 0 # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment, :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def get_scores(bbox, gt_box, score): output_list = [] for cls_idx in range(0, num_fg_classes): valid_gt_mask = (gt_box[0, :, -1].astype(np.int32) == (cls_idx + 1)) # [num_valid_gt, 5] valid_gt_box = gt_box[0, valid_gt_mask, :] num_valid_gt = len(valid_gt_box) if num_valid_gt == 0: output_list.append([]) else: bbox_per_class = bbox[:, cls_idx, :] # score_per_class, [first_n, 1] score_per_class = score[:, cls_idx:cls_idx + 1] # [first_n, num_valid_gt] overlap_mat = bbox_overlaps( bbox_per_class.astype(np.float), valid_gt_box[:, :-1].astype(np.float)) eye_matrix = np.eye(num_valid_gt) output_list_per_class = [] for thresh in self._target_thresh: # following mAP metric overlap_mask = (overlap_mat > thresh) valid_bbox_indices = np.where(overlap_mask)[0] # require score be 2-dim # [first_n, num_valid_gt] overlap_score = np.tile(score_per_class, (1, num_valid_gt)) overlap_score *= overlap_mask max_overlap_indices = np.argmax(overlap_mat, axis=1) # [first_n, num_valid_gt] max_overlap_mask = eye_matrix[max_overlap_indices] overlap_score *= max_overlap_mask output_list_per_class.append(overlap_score) output_list.append(output_list_per_class) return output_list
def assign_bbox(gt_bbox, det_bbox, frame_seg_id, traj_id): if len(gt_bbox) == 0 or len(det_bbox) == 0: return overlap_mat = bbox_overlaps(gt_bbox, det_bbox) matched_list = linear_sum_assignment(-overlap_mat) for matched_gt, matched_det in zip(*matched_list): if overlap_mat[matched_gt, matched_det] < overlap_thresh: continue matched_traj = traj_id[matched_gt] matched_gt_bbox = gt_bbox[matched_gt, :] matched_det_bbox = det_bbox[matched_det, :] err_x, err_y, err_r, err_s = get_stability_err( matched_gt_bbox, matched_det_bbox) det_traj_frame.setdefault(matched_traj, []).append( [frame_seg_id, err_x, err_y, err_r, err_s]) vid_traj.setdefault(frame_seg_id, []).append([matched_traj] + list(matched_det_bbox[:4]))
def assign_quadrangle_anchor(feat_shape, gt_boxes, im_info, cfg, feat_strides=[64, 32, 16, 8, 4], scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) #base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) #num_anchors = base_anchors.shape[0] #feat_height, feat_width = feat_shape[-2:] anchors_list = [] anchors_num_list = [] inds_inside_list = [] feat_infos = [] A_list = [] for i in range(len(feat_strides)): base_anchors = generate_anchors(base_size=feat_strides[i], ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[i][-2:] feat_stride = feat_strides[i] feat_infos.append([feat_height, feat_width]) shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = num_anchors A_list.append(A) K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) anchors_num_list.append(total_anchors) # only keep anchors inside the image # print 'allowed_border is',allowed_border 0 inds_inside = np.where( (all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape anchors_list.append(anchors) inds_inside_list.append(inds_inside) anchors = np.concatenate(anchors_list) for i in range(1, len(inds_inside_list)): inds_inside_list[i] = inds_inside_list[i] + sum(anchors_num_list[:i]) inds_inside = np.concatenate(inds_inside_list) total_anchors = sum(anchors_num_list) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) gt_boxes_bbox = np.zeros((gt_boxes.shape[0], 4), dtype=gt_boxes.dtype) ex_x = np.vstack( (gt_boxes[:, 0], gt_boxes[:, 2], gt_boxes[:, 4], gt_boxes[:, 6])) ex_y = np.vstack( (gt_boxes[:, 1], gt_boxes[:, 3], gt_boxes[:, 5], gt_boxes[:, 7])) gt_boxes_bbox[:, 0] = np.amin(ex_x, axis=0) gt_boxes_bbox[:, 1] = np.amin(ex_y, axis=0) gt_boxes_bbox[:, 2] = np.amax(ex_x, axis=0) gt_boxes_bbox[:, 3] = np.amax(ex_y, axis=0) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes_bbox.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) # temp = np.zeros((anchors.shape[0], 8), dtype=anchors.dtype) # temp[:, 0] = anchors[:, 0] # temp[:, 1] = anchors[:, 1] # temp[:, 2] = anchors[:, 2] # temp[:, 3] = anchors[:, 1] # temp[:, 4] = anchors[:, 2] # temp[:, 5] = anchors[:, 3] # temp[:, 6] = anchors[:, 0] # temp[:, 7] = anchors[:, 3] # eight_coordinate_anchors = temp if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes_bbox[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print 'means', means print 'stdevs', stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count # resahpe label_list = list() bbox_target_list = list() bbox_weight_list = list() anchors_num_range = [0] + anchors_num_list for i in range(len(feat_strides)): feat_height, feat_width = feat_infos[i] A = A_list[i] label = labels[sum(anchors_num_range[:i + 1]):sum(anchors_num_range[:i + 1]) + anchors_num_range[i + 1]] bbox_target = bbox_targets[sum(anchors_num_range[:i + 1] ):sum(anchors_num_range[:i + 1]) + anchors_num_range[i + 1]] bbox_weight = bbox_weights[sum(anchors_num_range[:i + 1] ):sum(anchors_num_range[:i + 1]) + anchors_num_range[i + 1]] label = label.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) label = label.reshape((1, A * feat_height * feat_width)) bbox_target = bbox_target.reshape( (1, feat_height * feat_width, A * 4)).transpose(0, 2, 1) bbox_weight = bbox_weight.reshape( (1, feat_height * feat_width, A * 4)).transpose((0, 2, 1)) label_list.append(label) bbox_target_list.append(bbox_target) bbox_weight_list.append(bbox_weight) label_concat = np.concatenate(label_list, axis=1) bbox_target_concat = np.concatenate(bbox_target_list, axis=2) bbox_weight_concat = np.concatenate(bbox_weight_list, axis=2) label = { 'label': label_concat, 'bbox_target': bbox_target_concat, 'bbox_weight': bbox_weight_concat } return label
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64), scales=(8,), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False,): """ assign ground truth boxes to anchor positions :param feat_shapes: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_strides: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :param balance_scale_bg: restrict the background samples for each pyramid level :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) ratios = np.array(ratios, dtype=np.float32) assert(len(feat_shapes) == len(feat_strides)) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future if len(scales.shape) == 1: base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) else: assert len(scales.shape) == len(ratios.shape) == 2 base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap fpn_labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 # subsample positive labels if we have too many num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1) bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0, len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) # label.update({'label_p' + str(feat_id + feat_id_start): labels, # 'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets, # 'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights}) label = { 'label': np.concatenate(label_list, axis=1), 'bbox_target': np.concatenate(bbox_target_list, axis=2), 'bbox_weight': np.concatenate(bbox_weight_list, axis=2) } return label
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] if DEBUG: print 'anchors:' print base_anchors print 'anchor shapes:' print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4])) print 'im_info', im_info print 'height', feat_height, 'width', feat_width print 'gt_boxes shape', gt_boxes.shape print 'gt_boxes', gt_boxes # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :] ** 2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means ** 2) print 'means', means print 'stdevs', stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = {'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights} return label
def sample_xyhs_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, dbbox_targets=None, gt_boxes=None): """ :param rois: al_rois [n, 4]; e2e [n, 5] with batch_index :param fg_rois_per_image: :param rois_per_image: :param num_clases: :param cfg: :param labels: :param overlaps: :param dbbox_targets: :param gt_boxes: optional for e2e [n, 9] (x1, y1, ..., x4, y4, cls) :return: """ if labels is None: # hgt_boxes = np.hstack((bbox_poly2hbb(gt_boxes[:, :-1]), gt_boxes[:, -1])) hgt_boxes = bbox_poly2hbb(gt_boxes) ## rois: (xmin, ymin, xmax, ymax) overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), hgt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = hgt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if dbbox_targets is not None: bbox_target_data = dbbox_targets[keep_indexes, :] else: # targets = dbbox_transform2_warp(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :8]) targets = dbboxtransform3_warp(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :8]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets_base(bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] ''' #yangyk print('gt_boxes:',gt_boxes[:,4]) print('gt_assignment:',gt_assignment) print('labels:',labels) print('rois shape:',rois.shape,'overlaps shape:',overlaps.shape,'labels shape',labels.shape) ''' # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) debug = False if debug: #yangyk print('fg_indexes size:', fg_indexes.size, 'fg_rois_per_image:', fg_rois_per_image, 'fg_rois_per_this_image:', fg_rois_per_this_image) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) #print('fg_over_laps:', overlaps[fg_indexes]) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] #yangyk labels_all = labels.copy() # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] #print('labels:',labels) # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) #yangyk overlaps = overlaps[keep_indexes] #print('fg_over_laps:', overlaps[:fg_rois_per_this_image]) neg_low = 0.0 neg_middle = 0.2 neg_high = 0.3 neg_indexes_L1 = np.where((overlaps < neg_middle) & (overlaps >= neg_low))[0] neg_indexes_L2 = np.where((overlaps < neg_high) & (overlaps >= neg_middle))[0] neg_indexes_L3 = np.where(overlaps >= neg_high)[0] neg_labels = np.zeros(labels.shape) #print(neg_indexes_L2) neg_labels[neg_indexes_L2] = labels_all[neg_indexes_L2] if debug: print('neg_indexes_L1:', len(neg_indexes_L1), 'neg_indexes_L2:', len(neg_indexes_L2), 'neg_indexes_L3', len(neg_indexes_L3)) print('labels_all:', labels_all) print('neg_labels:', neg_labels, 'neg_labels_shape:', neg_labels.shape) #print(neg_labels[neg_indexes_L2]) print('<<<fg neg labels>>>>', neg_labels[neg_indexes_L2]) print('fg neg labels sum', np.sum(neg_labels[neg_indexes_L2])) print('neg labels sum', np.sum(neg_labels)) print('over_laps:', overlaps) print('neg_fg_over_laps:', overlaps[neg_indexes_L2]) print('<<<<<<<<<<<<<<<<<<<<<<<<<>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>') bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) return rois, labels, neg_labels, bbox_targets, bbox_weights
def assign_pyramid_anchor(feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4,8,16,16,16), scales = (8,8,8,16,32),ratios = (0.5,1,2), allowed_border = 0, balance_scale_bg = False): def _unmap(data, count, inds, fill = 0): if len(data.shape) == 1: ret = np.empty((count,),dtype = np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:],dtype = np.float32) ret.fill(fill) ret[inds,:] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype = np.float32) ratios = np.array(ratios, dtype = np.float32) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0,4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): base_anchors = generate_anchors(base_size = feat_strides[feat_id], ratios = ratios, scales = [scales[feat_id]]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] labels = np.empty((len(inds_inside),),dtype = np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid,len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors,anchors)) fpn_labels = np.hstack((fpn_labels,labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height,feat_width,A,total_anchors]) if gt_boxes.size > 0: overlaps = bbox_overlaps(fpn_anchors.astype(np.float),gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis = 1) max_overlaps = overlaps[np.arange(len(fpn_anchors)),argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis = 0) gt_max_overlaps = overlaps[gt_argmax_overlaps,np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 fpn_labels[gt_argmax_overlaps] = 1 fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE ==-1 else int (cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size = (len(fg_inds) - num_fg), replace = False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels>=1) bg_inds = np.where(fpn_labels ==0)[0] fpn_anchors_fid = np.hstack((0,fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0,len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id+1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size = (len(bg_inds) - num_bg), replace = False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors),4),dtype = np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels>=1,:] = bbox_transform(fpn_anchors[fpn_labels>=1,:],gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) fpn_bbox_weights = np.zeros((len(fpn_anchors),4),dtype = np.float32) fpn_bbox_weights[fpn_labels>=1,:] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0,len(feat_strides)): feat_height, feat_width,A,total_anchors = fpn_args[feat_id] labels = _unmap(fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]],total_anchors,fpn_inds_inside[feat_id],fill = -1) bbox_targets = _unmap(fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap(fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id+1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape((1,feat_height, feat_width,A)).transpose(0,3,1,2) labels = labels.reshape((1,A*feat_height*feat_width)) bbox_targets = bbox_targets.reshape((1,feat_height,feat_width,A*4)).transpose(0,3,1,2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) label = { 'label':np.concatenate(label_list,axis = 1), 'bbox_target':np.concatenate(bbox_target_list, axis = 2), 'bbox_weight':np.concatenate(bbox_weight_list,axis = 2) } return label#label['label'] = 1,(A*w1*h1+A*w2*h2 +...),label['bbox_target'] = (1,4A,(w1h1+w2h2+...))
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4, device_id=0): """ A wrapper function, note we already know the class of boxes and masks """ nms = gpu_nms_wrapper(nms_thresh, device_id) # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i+1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) # inds array to record which mask should be aggregated together candidate_inds = [] # weight for each element in the candidate inds candidate_weights = [] # start position for candidate array candidate_start = [] candidate_scores = [] class_bar = [[] for _ in xrange(num_classes)] for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] # organize helper variable for gpu mask voting for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) for i in xrange(num_boxes): cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] candidate_inds.extend(cur_inds) cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) candidate_weights.extend(cur_weights) candidate_start.append(len(candidate_inds)) candidate_scores.extend(t_scores[c]) class_bar[c] = len(candidate_scores) candidate_inds = np.array(candidate_inds, dtype=np.int32) candidate_weights = np.array(candidate_weights, dtype=np.float32) candidate_start = np.array(candidate_start, dtype=np.int32) candidate_scores = np.array(candidate_scores, dtype=np.float32) # the input masks/boxes are relatively large # select only a subset of them are useful for mask merge unique_inds = np.unique(candidate_inds) unique_inds_order = unique_inds.argsort() unique_map = {} for i in xrange(len(unique_inds)): unique_map[unique_inds[i]] = unique_inds_order[i] for i in xrange(len(candidate_inds)): candidate_inds[i] = unique_map[candidate_inds[i]] boxes = boxes[unique_inds, ...] masks = masks[unique_inds, ...] boxes = np.round(boxes) result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds, candidate_start, candidate_weights, binary_thresh, im_height, im_width, device_id) result_box = np.hstack((result_box, candidate_scores[:, np.newaxis])) list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] cls_start = 0 for i in xrange(1, num_classes): cls_end = class_bar[i] cls_box = result_box[cls_start:cls_end, :] cls_mask = result_mask[cls_start:cls_end, :] valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0]) & (cls_box[:, 3] > cls_box[:, 1]))[0] list_result_box[i] = cls_box[valid_ind, :] list_result_mask[i] = cls_mask[valid_ind, :] cls_start = cls_end return list_result_mask, list_result_box
def get_target(bbox, gt_box, score, ref_bbox, ref_gt_box, ref_score): global num_of_is_full_max num_boxes = bbox.shape[0] ref_num_boxes = ref_bbox.shape[0] score_list = get_scores(bbox, gt_box, score) ref_score_list = get_scores(ref_bbox, ref_gt_box, ref_score) output_list = [] ref_output_list = [] for cls_idx in range(0, num_fg_classes): valid_gt_mask = (gt_box[0, :, -1].astype(np.int32) == (cls_idx + 1)) valid_gt_box = gt_box[0, valid_gt_mask, :] num_valid_gt = len(valid_gt_box) ref_valid_gt_mask = (ref_gt_box[0, :, -1].astype( np.int32) == (cls_idx + 1)) ref_valid_gt_box = ref_gt_box[0, ref_valid_gt_mask, :] ref_num_valid_gt = len(ref_valid_gt_box) score_list_per_class = score_list[cls_idx] ref_score_list_per_class = ref_score_list[cls_idx] bbox_per_class = bbox[:, cls_idx, :] ref_bbox_per_class = ref_bbox[:, cls_idx, :] if num_valid_gt != ref_num_valid_gt: if ref_num_valid_gt > num_valid_gt: num_rm = ref_num_valid_gt - num_valid_gt ref_num_valid_gt = num_valid_gt gt_overlap_mat = bbox_overlaps( ref_valid_gt_box.astype(np.float), valid_gt_box.astype(np.float)) rm_indices = np.argsort(np.sum(gt_overlap_mat, axis=1))[:num_rm] ref_valid_gt_box = np.delete(ref_valid_gt_box, rm_indices, axis=0) # update ref_score_list_per_class ref_score_list_per_class = get_scores_per_class( ref_bbox_per_class, ref_valid_gt_box, ref_score[:, cls_idx:cls_idx + 1]) assert ref_valid_gt_box.shape == valid_gt_box.shape, "failed remove ref, {} -> {}".format( ref_valid_gt_box.shape[0], valid_gt_box.shape[0]) print "success remove ref" else: num_rm = num_valid_gt - ref_num_valid_gt num_valid_gt = ref_num_valid_gt gt_overlap_mat = bbox_overlaps( valid_gt_box.astype(np.float), ref_valid_gt_box.astype(np.float)) rm_indices = np.argsort(np.sum(gt_overlap_mat, axis=1))[:num_rm] valid_gt_box = np.delete(valid_gt_box, rm_indices, axis=0) # update score_list_per_class score_list_per_class = get_scores_per_class( bbox_per_class, valid_gt_box, score[:, cls_idx:cls_idx + 1]) assert ref_valid_gt_box.shape == valid_gt_box.shape, "failed remove, {} -> {}".format( ref_valid_gt_box.shape[0], valid_gt_box.shape[0]) print "success remove" assert num_valid_gt == ref_num_valid_gt, "gt num are not the same" if len(score_list_per_class) == 0 or len( ref_score_list_per_class) == 0: output_list.append( get_max_socre_bboxes(score_list_per_class, num_boxes)) ref_output_list.append( get_max_socre_bboxes(ref_score_list_per_class, ref_num_boxes)) else: output_list_per_class = [] ref_output_list_per_class = [] for i in range(len(self._target_thresh)): overlap_score = score_list_per_class[i] ref_overlap_score = ref_score_list_per_class[i] output = np.zeros((overlap_score.shape[0], )) ref_output = np.zeros((ref_overlap_score.shape[0], )) if np.count_nonzero( overlap_score) == 0 or np.count_nonzero( ref_overlap_score) == 0: output_list_per_class.append(output) ref_output_list_per_class.append(ref_output) continue for x in range(num_valid_gt): overlap_score_per_gt = overlap_score[:, x] ref_overlap_score_per_gt = ref_overlap_score[:, x] valid_bbox_indices = np.where( overlap_score_per_gt)[0] ref_valid_bbox_indices = np.where( ref_overlap_score_per_gt)[0] target_gt_box = valid_gt_box[x:x + 1, :-1] ref_target_gt_box = ref_valid_gt_box[x:x + 1, :-1] if len(valid_bbox_indices) == 0 or len( ref_valid_bbox_indices) == 0: continue dist_mat = translation_dist( bbox_per_class[valid_bbox_indices], target_gt_box)[:, 0, :] ref_dist_mat = translation_dist( ref_bbox_per_class[ref_valid_bbox_indices], ref_target_gt_box)[:, 0, :] dist_mat_shape = ( bbox_per_class[valid_bbox_indices].shape[0], ref_bbox_per_class[ref_valid_bbox_indices]. shape[0], 4) # print((np.tile(np.expand_dims(dist_mat, 1), (1, dist_mat_shape[1], 1)) - # np.tile(np.expand_dims(ref_dist_mat, 0), (dist_mat_shape[0], 1, 1)))**2) bbox_dist_mat = np.sum( (np.tile(np.expand_dims(dist_mat, 1), (1, dist_mat_shape[1], 1)) - np.tile(np.expand_dims(ref_dist_mat, 0), (dist_mat_shape[0], 1, 1)))**2, axis=2) assert bbox_dist_mat.shape == ( len(bbox_per_class[valid_bbox_indices]), len(ref_bbox_per_class[ref_valid_bbox_indices]) ) # top_k = 10 # translation_thresh = 1.1*np.min(bbox_dist_mat) # top_k = np.sum(bbox_dist_mat < translation_thresh) top_k = int(0.1 * len(bbox_dist_mat.flatten()) + 0.5) top_k = max(1, top_k) top_k = min(top_k, len(bbox_dist_mat.flatten())) # top_k = 1 print("{} of out {} stable pair".format( top_k, len(bbox_dist_mat.flatten()))) ind_list, ref_ind_list = np.unravel_index( np.argsort(bbox_dist_mat, axis=None)[:top_k], bbox_dist_mat.shape) score_sum_list = [] rank_sum_list = [] for ind, ref_ind in zip(ind_list, ref_ind_list): score_sum = overlap_score_per_gt[ valid_bbox_indices[ ind]] + ref_overlap_score_per_gt[ ref_valid_bbox_indices[ref_ind]] rank_sum = valid_bbox_indices[ ind] + ref_valid_bbox_indices[ref_ind] score_sum_list.append(score_sum) rank_sum_list.append(rank_sum) score_max_idx = np.argmax(np.array(score_sum_list)) rank_max_idx = np.argmin(np.array(rank_sum_list)) if score_max_idx == rank_max_idx: score_rank_max[0] += 1 score_rank_max[1] += 1 # max_idx = rank_max_idx max_idx = score_max_idx ind = ind_list[max_idx] ref_ind = ref_ind_list[max_idx] if ind == np.argmax( overlap_score_per_gt[valid_bbox_indices]): # num_of_is_full_max[0] += 1 print('cur takes the max') if ref_ind == np.argmax(ref_overlap_score_per_gt[ ref_valid_bbox_indices]): # num_of_is_full_max[0] += 1 print('ref takes the max') output[valid_bbox_indices[ind]] = 1 ref_output[ref_valid_bbox_indices[ref_ind]] = 1 output_list_per_class.append(output) ref_output_list_per_class.append(ref_output) output_per_class = np.stack(output_list_per_class, axis=-1) ref_output_per_class = np.stack(ref_output_list_per_class, axis=-1) output_list.append(output_per_class) ref_output_list.append(ref_output_per_class) # [num_boxes, num_fg_classes, num_thresh] blob = np.stack(output_list, axis=1).astype(np.float32, copy=False) ref_blob = np.stack(ref_output_list, axis=1).astype(np.float32, copy=False) return blob, ref_blob
def assign_anchor(feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6, gt_boxes, im_info, cfg, feat_stride_p3=4, scales_p3=(8, ), ratios_p3=(0.75, 1, 1.5), feat_stride_p4=8, scales_p4=(8, ), ratios_p4=(0.75, 1, 1.5), feat_stride_p5=16, scales_p5=(8, ), ratios_p5=(0.75, 1, 1.5), feat_stride_p6=4, scales_p6=(8, ), ratios_p6=(0.75, 1, 1.5), allowed_border=1): """ assign ground truth boxes to anchor positions :param feat_shape: list of infer output shape :param gt_boxes: assign ground truth:[n, 5] :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ feat_shape = [feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6] feat_stride = [8, 16, 32, 64] scales = (8, 10, 12) ratios = (0.5, 1, 2) def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret im_info = im_info[0] #print 'im_info: ', im_info scales = np.array(scales, dtype=np.float32) if len(feat_stride) != len(feat_shape): assert ('length of feat_stride is not equal to length of feat_shape') labels_list = [] bbox_targets_list = [] bbox_weights_list = [] #print 'length of feat_shape: ',len(feat_shape) for i in range(len(feat_shape)): total_anchors = 0 base_anchors = generate_anchors(base_size=feat_stride[i], ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] #3 #print feat_shape[i] feat_height, feat_width = (feat_shape[i])[-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride[i] shift_y = np.arange(0, feat_height) * feat_stride[i] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors #3 K = shifts.shape[0] #h*w all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape( (K * A, 4)) #(k*A,4) in the original image # keep only inside anchors anchors = all_anchors # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & # (all_anchors[:, 1] >= -allowed_border) & # (all_anchors[:, 2] < im_info[1] + allowed_border) & # (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # label: 1 is positive, 0 is negative, -1 is dont care total_anchors = len(anchors) #3*w*h # anchors = all_anchors[inds_inside, :] labels = np.empty((total_anchors, ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) gt_labels = gt_boxes[:, -1] gt_labels_ = np.zeros((total_anchors, len(gt_labels)), dtype=np.int) gt_labels_[:, :] = gt_labels # print gt_labels_ labels = gt_labels_[np.arange(total_anchors), argmax_overlaps] max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps] # gt_argmax_overlaps = overlaps.argmax(axis=0) # gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] # gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 labels[(max_overlaps >= cfg.TRAIN.RPN_NEGATIVE_OVERLAP) & (max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP)] = -1 # bg_inds = np.where(labels == 0)[0] # if len(bg_inds) > 256: # disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - 256), replace=False) # labels[disable_inds] = -1 else: labels[:] = 0 # # print anchors[labels>0] # # a = anchors[labels>0].astype(np.int) # # np.savetxt('aa.txt',a,fmt="%d %d %d %d") # if len(anchors[labels>0])!=0: # aaa bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_weights[labels > 0, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) # map up to original set of anchors labels = _unmap(labels, int(K * A), range(total_anchors), fill=-1) bbox_targets = _unmap(bbox_targets, int(K * A), range(total_anchors), fill=0) bbox_weights = _unmap(bbox_weights, int(K * A), range(total_anchors), fill=0) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) labels_list.append(labels) bbox_targets_list.append(bbox_targets) bbox_weights_list.append(bbox_weights) if len(feat_shape) == 4: label = { 'label/p3': labels_list[0], 'label/p4': labels_list[1], 'label/p5': labels_list[2], 'label/p6': labels_list[3], 'bbox_target/p3': bbox_targets_list[0], 'bbox_target/p4': bbox_targets_list[1], 'bbox_target/p5': bbox_targets_list[2], 'bbox_target/p6': bbox_targets_list[3], 'bbox_weight/p3': bbox_weights_list[0], 'bbox_weight/p4': bbox_weights_list[1], 'bbox_weight/p5': bbox_weights_list[2], 'bbox_weight/p6': bbox_weights_list[3] } return label
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) return rois, labels, bbox_targets, bbox_weights
def forward(self, is_train, req, in_data, out_data, aux): rois = in_data[0].asnumpy() cls_prob = in_data[1].asnumpy() if self._cfg.CLASS_AGNOSTIC: bbox_deltas = in_data[2].asnumpy()[:, 4:8] else: fg_cls_prob = cls_prob[:, 1:] fg_cls_idx = np.argmax(fg_cls_prob, axis=1).astype(np.int) batch_idx_array = np.arange(fg_cls_idx.shape[0], dtype=np.int) # bbox_deltas = in_data[2].asnumpy()[batch_idx_array, fg_cls_idx * 4 : (fg_cls_idx+1) * 4] in_data2 = in_data[2].asnumpy() bbox_deltas = np.hstack( (in_data2[batch_idx_array, fg_cls_idx * 4].reshape(-1, 1), in_data2[batch_idx_array, fg_cls_idx * 4 + 1].reshape(-1, 1), in_data2[batch_idx_array, fg_cls_idx * 4 + 2].reshape(-1, 1), in_data2[batch_idx_array, fg_cls_idx * 4 + 3].reshape(-1, 1))) im_info = in_data[3].asnumpy()[0, :] gt_boxes = in_data[4].asnumpy() # post processing if self._cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: bbox_deltas = bbox_deltas * np.array( self._cfg.TRAIN.BBOX_STDS) + np.array( self._cfg.TRAIN.BBOX_MEANS) proposals = bbox_pred(rois[:, 1:], bbox_deltas) proposals = clip_boxes(proposals, im_info[:2]) # only support single batch batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # reassign label gt_classes = gt_boxes[:, -1].astype(np.int) overlaps = np.zeros((blob.shape[0], self._cfg.dataset.NUM_CLASSES), dtype=np.float32) # n boxes and k gt_boxes => n * k overlap gt_overlaps = bbox_overlaps(blob[:, 1:].astype(np.float), gt_boxes[:, :-1].astype(np.float)) # for each box in n boxes, select only maximum overlap (must be greater than zero) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] roi_max_classes = overlaps.argmax(axis=1) roi_max_overlaps = overlaps.max(axis=1) # assign bg labels roi_max_classes[np.where( roi_max_overlaps < self._cfg.TRAIN.FG_THRESH)] = 0 assert (roi_max_classes[np.where( roi_max_overlaps < self._cfg.TRAIN.FG_THRESH)] == 0).all() if self._resample == -1: self.assign(out_data[0], req[0], blob) self.assign(out_data[1], req[1], roi_max_classes) else: # Include ground-truth boxes in the set of candidate rois batch_inds = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) all_rois = np.vstack((np.hstack( (batch_inds, gt_boxes[:, :-1])), blob)) # gt boxes pred_classes = gt_boxes[:, -1] pred_scores = np.ones_like(pred_classes) max_classes = pred_classes.copy() max_overlaps = np.ones_like(max_classes) # predicted boxes roi_pred_classes = cls_prob.argmax(axis=1) roi_pred_scores = cls_prob.max(axis=1) roi_rec = {} # roi_rec['pred_classes'] = np.vstack((pred_classes, roi_pred_classes)) # roi_rec['scores'] = np.vstack((pred_scores, roi_pred_scores)) # roi_rec['max_classes'] = np.vstack((max_classes, roi_max_classes)) # roi_rec['max_overlaps'] = np.vstack((max_overlaps, roi_max_overlaps)) roi_rec['pred_classes'] = np.append(pred_classes, roi_pred_classes) roi_rec['scores'] = np.append(pred_scores, roi_pred_scores) roi_rec['max_classes'] = np.append(max_classes, roi_max_classes) roi_rec['max_overlaps'] = np.append(max_overlaps, roi_max_overlaps) if self._cfg.DCR.sample == 'DCRV1': keep_indexes, pad_indexes = sample_rois_fg_bg( roi_rec, self._cfg, self._resample) elif self._cfg.DCR.sample == 'RANDOM': keep_indexes, pad_indexes = sample_rois_random( roi_rec, self._cfg, self._resample) else: raise ValueError('Undefined sampling method: %s' % self._cfg.DCR.sample) resampled_blob = np.vstack( (all_rois[keep_indexes, :], all_rois[pad_indexes, :])) # assign bg classes assert (roi_rec['max_classes'][np.where( roi_rec['max_overlaps'] < self._cfg.TRAIN.FG_THRESH)] == 0 ).all() resampled_label = np.append(roi_rec['max_classes'][keep_indexes], -1 * np.ones(len(pad_indexes))) self.assign(out_data[0], req[0], resampled_blob) self.assign(out_data[1], req[1], resampled_label)
def gpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4, device_id=0): #0.4 """ A wrapper function, note we already know the class of boxes and masks """ nms = gpu_nms_wrapper(nms_thresh, device_id) # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) # inds array to record which mask should be aggregated together candidate_inds = [] # weight for each element in the candidate inds candidate_weights = [] # start position for candidate array candidate_start = [] candidate_scores = [] class_bar = [[] for _ in xrange(num_classes)] for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] # organize helper variable for gpu mask voting for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) for i in xrange(num_boxes): cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] candidate_inds.extend(cur_inds) cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) candidate_weights.extend(cur_weights) candidate_start.append(len(candidate_inds)) candidate_scores.extend(t_scores[c]) class_bar[c] = len(candidate_scores) candidate_inds = np.array(candidate_inds, dtype=np.int32) candidate_weights = np.array(candidate_weights, dtype=np.float32) candidate_start = np.array(candidate_start, dtype=np.int32) candidate_scores = np.array(candidate_scores, dtype=np.float32) # the input masks/boxes are relatively large # select only a subset of them are useful for mask merge unique_inds = np.unique(candidate_inds) unique_inds_order = unique_inds.argsort() unique_map = {} for i in xrange(len(unique_inds)): unique_map[unique_inds[i]] = unique_inds_order[i] for i in xrange(len(candidate_inds)): candidate_inds[i] = unique_map[candidate_inds[i]] boxes = boxes[unique_inds, ...] masks = masks[unique_inds, ...] boxes = np.round(boxes) result_mask, result_box = mask_voting_kernel(boxes, masks, candidate_inds, candidate_start, candidate_weights, binary_thresh, im_height, im_width, device_id) result_box = np.hstack((result_box, candidate_scores[:, np.newaxis])) list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] cls_start = 0 for i in xrange(1, num_classes): cls_end = class_bar[i] cls_box = result_box[cls_start:cls_end, :] cls_mask = result_mask[cls_start:cls_end, :] valid_ind = np.where((cls_box[:, 2] > cls_box[:, 0]) & (cls_box[:, 3] > cls_box[:, 1]))[0] ######################## # cls_box = cls_box[valid_ind, :] # cls_mask = cls_mask[valid_ind, :] # #print 'cls_box', cls_box # def nms(dets, thresh): # """ # greedily select boxes with high confidence and overlap with current maximum <= thresh # rule out overlap >= thresh # :param dets: [[x1, y1, x2, y2 score]] # :param thresh: retain overlap < thresh # :return: indexes to keep # """ # if dets.shape[0] == 0: # return [] # x1 = dets[:, 0] # y1 = dets[:, 1] # x2 = dets[:, 2] # y2 = dets[:, 3] # scores = dets[:, 4] # areas = (x2 - x1 + 1) * (y2 - y1 + 1) # order = scores.argsort()[::-1] # keep = [] # while order.size > 0: # i = order[0] # keep.append(i) # xx1 = np.maximum(x1[i], x1[order[1:]]) # yy1 = np.maximum(y1[i], y1[order[1:]]) # xx2 = np.minimum(x2[i], x2[order[1:]]) # yy2 = np.minimum(y2[i], y2[order[1:]]) # w = np.maximum(0.0, xx2 - xx1 + 1) # h = np.maximum(0.0, yy2 - yy1 + 1) # inter = w * h # ovr = inter / (areas[i] + areas[order[1:]] - inter) # inds = np.where(ovr <= thresh)[0] # order = order[inds + 1] # return keep # #print 'aaaaaaaa' # keep = nms(cls_box, 0.3) #bei niedrigen treshhold wirfts welche raus # #print 'aa', len(keep), len(boxes_scored_ar) # #print 'keep', keep # #print 'a', len(boxes_scored_ar) # #print 'b', len(boxes_scored_ar[keep, :]) # cls_box = cls_box[keep, :] # cls_mask = cls_mask[keep, :] # # print 'cls_box', cls_box # # print 'cls_mask', cls_mask # list_result_box[i] = cls_box # list_result_mask[i] = cls_mask ################# list_result_box[i] = cls_box[valid_ind, :] #auscommenten wenn nms an. list_result_mask[i] = cls_mask[valid_ind, :] #auscommenten wehn nms an cls_start = cls_end return list_result_mask, list_result_box
def pred_double_eval(predictor, test_data, imdb, cfg, vis=False, thresh=1e-3, logger=None, ignore_cache=True, show_gt=False): """ wrapper for calculating offline validation for faster data analysis in this example, all threshold are set by hand :param predictor: Predictor :param test_data: data iterator, must be non-shuffle :param imdb: image database :param vis: controls visualization :param thresh: valid detection threshold :return: """ det_file = os.path.join(imdb.result_path, imdb.name + '_detections.pkl') if os.path.exists(det_file) and not ignore_cache: with open(det_file, 'rb') as fid: all_boxes = cPickle.load(fid) info_str = imdb.evaluate_detections(all_boxes) if logger: logger.info('evaluate detections: \n{}'.format(info_str)) return assert vis or not test_data.shuffle data_names = [k[0] for k in test_data.provide_data[0]] num_images = test_data.size if not isinstance(test_data, PrefetchingIter): test_data = PrefetchingIter(test_data) #if cfg.TEST.SOFTNMS: # nms = py_softnms_wrapper(cfg.TEST.NMS) #else: # nms = py_nms_wrapper(cfg.TEST.NMS) if cfg.TEST.SOFTNMS: nms = py_softnms_wrapper(cfg.TEST.NMS) else: nms = py_nms_wrapper(cfg.TEST.NMS) # limit detections to max_per_image over all classes max_per_image = cfg.TEST.max_per_image # all detections are collected into: # all_boxes[cls][image] = N x 5 array of detections in # (x1, y1, x2, y2, score) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] ref_all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] # class_lut = [[] for _ in range(imdb.num_classes)] valid_tally = 0 valid_sum = 0 idx = 0 t = time.time() inference_count = 0 all_inference_time = [] post_processing_time = [] nms_full_count = [] nms_pos_count = [] is_max_count = [] all_count = [] for im_info, data_batch in test_data: t1 = time.time() - t t = time.time() scales = [iim_info[0, 2] for iim_info in im_info] scores_all, boxes_all, ref_scores_all, ref_boxes_all, data_dict_all = im_double_detect( predictor, data_batch, data_names, scales, cfg) t2 = time.time() - t t = time.time() # for delta, (scores, boxes, data_dict) in enumerate(zip(scores_all, boxes_all, data_dict_all)): nms_full_count_per_batch = 0 nms_pos_count_per_batch = 0 global num_of_is_full_max is_max_count_per_batch = num_of_is_full_max[0] all_count_per_batch = 0 for delta, (scores, boxes, ref_scores, ref_boxes, data_dict) in enumerate( zip(scores_all, boxes_all, ref_scores_all, ref_boxes_all, data_dict_all)): if cfg.TEST.LEARN_NMS: for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j - 1, 0] > thresh)[0] cls_scores = scores[indexes, j - 1, :] cls_boxes = boxes[indexes, j - 1, :] cls_dets = np.hstack((cls_boxes, cls_scores)) # count the valid ground truth if len(cls_scores) > 0: # class_lut[j].append(idx + delta) valid_tally += len(cls_scores) valid_sum += len(scores) all_boxes[j][idx + delta] = cls_dets if DEBUG: keep = nms(cls_dets) nms_cls_dets = cls_dets[keep, :] target = data_dict['nms_multi_target'] target_indices = np.where(target[:, 4] == j - 1) target = target[target_indices] nms_full_count_per_batch += bbox_equal_count( nms_cls_dets, target) gt_boxes = data_dict['gt_boxes'][0].asnumpy() gt_boxes = gt_boxes[np.where(gt_boxes[:, 4] == j)[0], :4] gt_boxes /= scales[delta] if len(cls_boxes) != 0 and len(gt_boxes) != 0: overlap_mat = bbox_overlaps( cls_boxes.astype(np.float), gt_boxes.astype(np.float)) keep = nms( cls_dets[np.where(overlap_mat > 0.5)[0]]) nms_cls_dets = cls_dets[np.where( overlap_mat > 0.5)[0]][keep] nms_pos_count_per_batch += bbox_equal_count( nms_cls_dets, target) all_count_per_batch += len(target) else: for j in range(1, imdb.num_classes): indexes = np.where(scores[:, j] > thresh)[0] if cfg.TEST.FIRST_N > 0: # todo: check whether the order affects the result sort_indices = np.argsort( scores[:, j])[-cfg.TEST.FIRST_N:] # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N] indexes = np.intersect1d(sort_indices, indexes) cls_scores = scores[indexes, j, np.newaxis] cls_boxes = boxes[indexes, 4:8] if cfg.CLASS_AGNOSTIC else boxes[ indexes, j * 4:(j + 1) * 4] # count the valid ground truth if len(cls_scores) > 0: # class_lut[j].append(idx+delta) valid_tally += len(cls_scores) valid_sum += len(scores) # print np.min(cls_scores), valid_tally, valid_sum # cls_scores = scores[:, j, np.newaxis] # cls_scores[cls_scores <= thresh] = thresh # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) if cfg.TEST.SOFTNMS: all_boxes[j][idx + delta] = nms(cls_dets) else: keep = nms(cls_dets) all_boxes[j][idx + delta] = cls_dets[keep, :] # all_boxes[j][idx + delta] = cls_dets if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][idx + delta][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( all_boxes[j][idx + delta][:, -1] >= image_thresh)[0] all_boxes[j][idx + delta] = all_boxes[j][idx + delta][keep, :] if vis: boxes_this_image = [[]] + [ all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] if show_gt: gt_boxes = data_dict['gt_boxes'][0] for gt_box in gt_boxes: gt_box = gt_box.asnumpy() gt_cls = int(gt_box[4]) gt_box = gt_box / scales[delta] gt_box[4] = 1 if cfg.TEST.LEARN_NMS: gt_box = np.append(gt_box, 1) boxes_this_image[gt_cls] = np.vstack( (boxes_this_image[gt_cls], gt_box)) if cfg.TEST.LEARN_NMS: target_boxes = data_dict['nms_multi_target'] for target_box in target_boxes: print("cur", target_box * scales[delta]) target_cls = int(target_box[4]) + 1 target_box[4] = 2 + target_box[5] target_box[5] = target_box[6] target_box = target_box[:6] boxes_this_image[target_cls] = np.vstack( (boxes_this_image[target_cls], target_box)) # vis_all_detection(data_dict['ref_data'].asnumpy(), boxes_this_image, imdb.classes, scales[delta], cfg) # vis_double_all_detection(data_dict['data'].asnumpy(), boxes_this_image, data_dict['ref_data'].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg) if cfg.TEST.LEARN_NMS: for j in range(1, imdb.num_classes): indexes = np.where(ref_scores[:, j - 1, 0] > thresh)[0] cls_scores = ref_scores[indexes, j - 1, :] cls_boxes = ref_boxes[indexes, j - 1, :] cls_dets = np.hstack((cls_boxes, cls_scores)) # count the valid ground truth if len(cls_scores) > 0: # class_lut[j].append(idx + delta) valid_tally += len(cls_scores) valid_sum += len(ref_scores) ref_all_boxes[j][idx + delta] = cls_dets if DEBUG: pass keep = nms(cls_dets) nms_cls_dets = cls_dets[keep, :] target = data_dict['ref_nms_multi_target'] target_indices = np.where(target[:, 4] == j - 1) target = target[target_indices] nms_full_count_per_batch += bbox_equal_count( nms_cls_dets, target) gt_boxes = data_dict['ref_gt_boxes'][0].asnumpy() gt_boxes = gt_boxes[np.where(gt_boxes[:, 4] == j)[0], :4] gt_boxes /= scales[delta] if len(cls_boxes) != 0 and len(gt_boxes) != 0: overlap_mat = bbox_overlaps( cls_boxes.astype(np.float), gt_boxes.astype(np.float)) keep = nms( cls_dets[np.where(overlap_mat > 0.5)[0]]) nms_cls_dets = cls_dets[np.where( overlap_mat > 0.5)[0]][keep] nms_pos_count_per_batch += bbox_equal_count( nms_cls_dets, target) all_count_per_batch += len(target) else: for j in range(1, imdb.num_classes): indexes = np.where(ref_scores[:, j] > thresh)[0] if cfg.TEST.FIRST_N > 0: # todo: check whether the order affects the result sort_indices = np.argsort( ref_scores[:, j])[-cfg.TEST.FIRST_N:] # sort_indices = np.argsort(-scores[:, j])[0:cfg.TEST.FIRST_N] indexes = np.intersect1d(sort_indices, indexes) cls_scores = ref_scores[indexes, j, np.newaxis] cls_boxes = ref_boxes[ indexes, 4:8] if cfg.CLASS_AGNOSTIC else ref_boxes[indexes, j * 4:(j + 1) * 4] # count the valid ground truth if len(cls_scores) > 0: # class_lut[j].append(idx+delta) valid_tally += len(cls_scores) valid_sum += len(ref_scores) # print np.min(cls_scores), valid_tally, valid_sum # cls_scores = scores[:, j, np.newaxis] # cls_scores[cls_scores <= thresh] = thresh # cls_boxes = boxes[:, 4:8] if cfg.CLASS_AGNOSTIC else boxes[:, j * 4:(j + 1) * 4] cls_dets = np.hstack((cls_boxes, cls_scores)) if cfg.TEST.SOFTNMS: ref_all_boxes[j][idx + delta] = nms(cls_dets) else: keep = nms(cls_dets) ref_all_boxes[j][idx + delta] = cls_dets[keep, :] if max_per_image > 0: image_scores = np.hstack([ ref_all_boxes[j][idx + delta][:, -1] for j in range(1, imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where( ref_all_boxes[j][idx + delta][:, -1] >= image_thresh)[0] ref_all_boxes[j][idx + delta] = ref_all_boxes[j][ idx + delta][keep, :] if vis: ref_boxes_this_image = [[]] + [ ref_all_boxes[j][idx + delta] for j in range(1, imdb.num_classes) ] if show_gt: gt_boxes = data_dict['ref_gt_boxes'][0] for gt_box in gt_boxes: gt_box = gt_box.asnumpy() gt_cls = int(gt_box[4]) gt_box = gt_box / scales[delta] gt_box[4] = 1 if cfg.TEST.LEARN_NMS: gt_box = np.append(gt_box, 1) ref_boxes_this_image[gt_cls] = np.vstack( (ref_boxes_this_image[gt_cls], gt_box)) if cfg.TEST.LEARN_NMS: target_boxes = data_dict['ref_nms_multi_target'] for target_box in target_boxes: print("ref", target_box * scales[delta]) target_cls = int(target_box[4]) + 1 target_box[4] = 2 + target_box[5] target_box[5] = target_box[6] target_box = target_box[:6] ref_boxes_this_image[target_cls] = np.vstack( (ref_boxes_this_image[target_cls], target_box)) vis_double_all_detection(data_dict['data'][0:1].asnumpy(), boxes_this_image, data_dict['data'][1:2].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg) # vis_all_detection(data_dict['ref_data'].asnumpy(), ref_boxes_this_image, imdb.classes, scales[delta], cfg) if DEBUG: nms_full_count.append(nms_full_count_per_batch) nms_pos_count.append(nms_pos_count_per_batch) is_max_count.append(is_max_count_per_batch) all_count.append(all_count_per_batch) print("full:{} pos:{} max:{}".format( 1.0 * sum(nms_full_count) / sum(all_count), 1.0 * sum(nms_pos_count) / sum(all_count), 1.0 * sum(is_max_count) / sum(all_count))) idx += test_data.batch_size t3 = time.time() - t t = time.time() post_processing_time.append(t3) all_inference_time.append(t1 + t2 + t3) inference_count += 1 if inference_count % 200 == 0: valid_count = 500 if inference_count > 500 else inference_count print("--->> running-average inference time per batch: {}".format( float(sum(all_inference_time[-valid_count:])) / valid_count)) print("--->> running-average post processing time per batch: {}". format( float(sum(post_processing_time[-valid_count:])) / valid_count)) print 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, num_images, t1, t2, t3) if logger: logger.info( 'testing {}/{} data {:.4f}s net {:.4f}s post {:.4f}s'.format( idx, num_images, t1, t2, t3))
def sample_rois(rois, fg_rois_per_image, rois_per_image, num_classes, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, gt_kps=None): """ generate random sample of ROIs comprising foreground and background examples :param rois: all_rois [n, 4]; e2e: [n, 5] with batch_index :param fg_rois_per_image: foreground roi number :param rois_per_image: total roi number :param num_classes: number of classes :param labels: maybe precomputed :param overlaps: maybe precomputed (max_overlaps) :param bbox_targets: maybe precomputed :param gt_boxes: optional for e2e [n, 5] (x1, y1, x2, y2, cls) :param gt_kps: optional for e2e [n, num_kps*3] (x1, y1, v1, ...) :return: (labels, rois, bbox_targets, bbox_weights) """ if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = npr.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = npr.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) gap_indexes = npr.choice(range(len(rois)), size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox_target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) res = {'rois_output': rois, 'label' : labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights, } if gt_kps is not None: keep_kps = gt_kps[gt_assignment[keep_indexes]] n_keep = keep_kps.shape[0] K = cfg.dataset.NUM_KEYPOINTS assert gt_kps.shape[1] == K*3 G = cfg.network.KEYPOINTS_POOLED_SIZE kps_labels = np.empty([n_keep, K], dtype=np.float32) kps_labels.fill(-1) kps_targets = np.zeros([n_keep, K, G, G, 2], dtype=np.float32) kps_weights = kps_targets.copy() num_fg = fg_indexes.size assert num_fg > 0, 'need at least one roi' # assgin kp targets fg_kps_label, fg_kps_target, fg_kps_weight = assign_keypoints(rois[:num_fg, 1:], keep_kps[:num_fg], pooled_size=G) kps_labels[:num_fg] = fg_kps_label kps_targets[:num_fg] = fg_kps_target normalizer = 1.0 / (num_fg + 1e-3) kps_weights[:num_fg] = fg_kps_weight * normalizer res['kps_label'] = kps_labels.reshape([-1]) res['kps_target'] = kps_targets.transpose([0,1,4,2,3]).reshape([n_keep, -1, G, G]) res['kps_weight'] = kps_weights.transpose([0,1,4,2,3]).reshape([n_keep, -1, G, G]) return res
def evaluate_recall(self, roidb, candidate_boxes=None, thresholds=None): """ evaluate detection proposal recall metrics record max overlap value for each gt box; return vector of overlap values :param roidb: used to evaluate :param candidate_boxes: if not given, use roidb's non-gt boxes :param thresholds: array-like recall threshold :return: None ar: average recall, recalls: vector recalls at each IoU overlap threshold thresholds: vector of IoU overlap threshold, gt_overlaps: vector of all ground-truth overlaps """ all_log_info = '' area_names = ['all', '0-25', '25-50', '50-100', '100-200', '200-300', '300-inf'] area_ranges = [[0**2, 1e5**2], [0**2, 25**2], [25**2, 50**2], [50**2, 100**2], [100**2, 200**2], [200**2, 300**2], [300**2, 1e5**2]] area_counts = [] for area_name, area_range in zip(area_names[1:], area_ranges[1:]): area_count = 0 for i in range(self.num_images): if candidate_boxes is None: # default is use the non-gt boxes from roidb non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0] boxes = roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] boxes_areas = (boxes[:, 2] - boxes[:, 0] + 1) * (boxes[:, 3] - boxes[:, 1] + 1) valid_range_inds = np.where((boxes_areas >= area_range[0]) & (boxes_areas < area_range[1]))[0] area_count += len(valid_range_inds) area_counts.append(area_count) total_counts = float(sum(area_counts)) for area_name, area_count in zip(area_names[1:], area_counts): log_info = 'percentage of {} {}'.format(area_name, area_count / total_counts) print log_info all_log_info += log_info log_info = 'average number of proposal {}'.format(total_counts / self.num_images) print log_info all_log_info += log_info for area_name, area_range in zip(area_names, area_ranges): gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # check for max_overlaps == 1 avoids including crowd annotations max_gt_overlaps = roidb[i]['gt_overlaps'].max(axis=1) gt_inds = np.where((roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = roidb[i]['boxes'][gt_inds, :] gt_areas = (gt_boxes[:, 2] - gt_boxes[:, 0] + 1) * (gt_boxes[:, 3] - gt_boxes[:, 1] + 1) valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas < area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # default is use the non-gt boxes from roidb non_gt_inds = np.where(roidb[i]['gt_classes'] == 0)[0] boxes = roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) # choose whatever is smaller to iterate rounds = min(boxes.shape[0], gt_boxes.shape[0]) for j in range(rounds): # find which proposal maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # get the IoU amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is covered by most IoU gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0), '%s\n%s\n%s' % (boxes, gt_boxes, overlaps) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the IoU coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded IoU coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each IoU threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) ar = recalls.mean() # print results log_info = 'average recall for {}: {:.3f}'.format(area_name, ar) print log_info all_log_info += log_info for threshold, recall in zip(thresholds, recalls): log_info = 'recall @{:.2f}: {:.3f}'.format(threshold, recall) print log_info all_log_info += log_info return all_log_info
def sample_rois(self, rois, fg_rois_per_image, rois_per_image, num_classes, cfg, labels=None, overlaps=None, bbox_targets=None, gt_boxes=None, gt_masks=None): if labels is None: overlaps = bbox_overlaps(rois[:, 1:].astype(np.float), gt_boxes[:, :4].astype(np.float)) gt_assignment = overlaps.argmax(axis=1) overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # foreground RoI with FG_THRESH overlap fg_indexes = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] if cfg.TRAIN.IGNORE_GAP: keep_inds = remove_repetition(rois[fg_indexes, 1:]) fg_indexes = fg_indexes[keep_inds] # guard against the case when an image has fewer than fg_rois_per_image foreground RoIs fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_indexes.size) # Sample foreground regions without replacement if len(fg_indexes) > fg_rois_per_this_image: fg_indexes = np.random.choice(fg_indexes, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_indexes = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] if cfg.TRAIN.IGNORE_GAP: keep_inds = remove_repetition(rois[bg_indexes, 1:]) bg_indexes = bg_indexes[keep_inds] # Compute number of background RoIs to take from this image (guarding against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_indexes.size) # Sample foreground regions without replacement if len(bg_indexes) > bg_rois_per_this_image: bg_indexes = np.random.choice(bg_indexes, size=bg_rois_per_this_image, replace=False) # indexes selected keep_indexes = np.append(fg_indexes, bg_indexes) # pad more to ensure a fixed minibatch size while keep_indexes.shape[0] < rois_per_image: gap = np.minimum(len(rois), rois_per_image - keep_indexes.shape[0]) if cfg.TRAIN.GAP_SELECT_FROM_ALL: gap_indexes = np.random.choice(range(len(rois)), size=gap, replace=False) else: bg_full_indexes = list(set(range(len(rois))) - set(fg_indexes)) gap_indexes = np.random.choice(bg_full_indexes, size=gap, replace=False) keep_indexes = np.append(keep_indexes, gap_indexes) # select labels labels = labels[keep_indexes] # set labels of bg_rois to be 0 labels[fg_rois_per_this_image:] = 0 rois = rois[keep_indexes] # load or compute bbox target if bbox_targets is not None: bbox_target_data = bbox_targets[keep_indexes, :] else: targets = bbox_transform(rois[:, 1:], gt_boxes[gt_assignment[keep_indexes], :4]) if cfg.TRAIN.BBOX_NORMALIZATION_PRECOMPUTED: targets = ((targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS)) bbox_target_data = np.hstack((labels[:, np.newaxis], targets)) bbox_targets, bbox_weights = \ expand_bbox_regression_targets(bbox_target_data, num_classes, cfg) if cfg.TRAIN.IGNORE_GAP: valid_rois_per_this_image = fg_rois_per_this_image + bg_rois_per_this_image labels[valid_rois_per_this_image:] = -1 bbox_weights[valid_rois_per_this_image:] = 0 # masks # debug_gt_image_buffer = cv2.imread('debug_im_buffer.jpg') mask_reg_targets = -np.ones( (len(keep_indexes), 1, self._mask_size, self._mask_size)) for idx, obj in enumerate(fg_indexes): gt_roi = np.round(gt_boxes[gt_assignment[obj], :-1]).astype(int) ex_roi = np.round(rois[idx, 1:]).astype(int) gt_mask = gt_masks[gt_assignment[obj]] mask_reg_target = intersect_box_mask(ex_roi, gt_roi, gt_mask) mask_reg_target = cv2.resize(mask_reg_target.astype(np.float), (self._mask_size, self._mask_size)) mask_reg_target = mask_reg_target >= self._binary_thresh mask_reg_targets[idx, ...] = mask_reg_target return rois, labels, bbox_targets, bbox_weights, mask_reg_targets
def cpu_mask_voting(masks, boxes, scores, num_classes, max_per_image, im_width, im_height, nms_thresh, merge_thresh, binary_thresh=0.4): """ Wrapper function for mask voting, note we already know the class of boxes and masks """ masks = masks.astype(np.float32) mask_size = masks.shape[-1] nms = py_nms_wrapper(nms_thresh) # apply nms and sort to get first images according to their scores # Intermediate results t_boxes = [[] for _ in xrange(num_classes)] t_scores = [[] for _ in xrange(num_classes)] t_all_scores = [] for i in xrange(1, num_classes): dets = np.hstack((boxes.astype(np.float32), scores[:, i:i + 1])) inds = nms(dets) num_keep = min(len(inds), max_per_image) inds = inds[:num_keep] t_boxes[i] = boxes[inds] t_scores[i] = scores[inds, i] t_all_scores.extend(scores[inds, i]) sorted_scores = np.sort(t_all_scores)[::-1] num_keep = min(len(sorted_scores), max_per_image) thresh = max(sorted_scores[num_keep - 1], 1e-3) for i in xrange(1, num_classes): keep = np.where(t_scores[i] >= thresh) #print 'keep', keep t_boxes[i] = t_boxes[i][keep] t_scores[i] = t_scores[i][keep] num_detect = boxes.shape[0] res_mask = [[] for _ in xrange(num_detect)] for i in xrange(num_detect): box = np.round(boxes[i]).astype(int) mask = cv2.resize(masks[i, 0].astype(np.float32), (box[2] - box[0] + 1, box[3] - box[1] + 1)) res_mask[i] = mask list_result_box = [[] for _ in xrange(num_classes)] list_result_mask = [[] for _ in xrange(num_classes)] for c in xrange(1, num_classes): num_boxes = len(t_boxes[c]) masks_ar = np.zeros((num_boxes, 1, mask_size, mask_size)) boxes_ar = np.zeros((num_boxes, 4)) for i in xrange(num_boxes): # Get weights according to their segmentation scores cur_ov = bbox_overlaps(boxes.astype(np.float), t_boxes[c][i, np.newaxis].astype(np.float)) cur_inds = np.where(cur_ov >= merge_thresh)[0] cur_weights = scores[cur_inds, c] cur_weights = cur_weights / sum(cur_weights) # Re-format mask when passing it to mask_aggregation p_mask = [res_mask[j] for j in list(cur_inds)] # do mask aggregation orig_mask, boxes_ar[i] = mask_aggregation(boxes[cur_inds], p_mask, cur_weights, im_width, im_height, binary_thresh) masks_ar[i, 0] = cv2.resize(orig_mask.astype(np.float32), (mask_size, mask_size)) boxes_scored_ar = np.hstack((boxes_ar, t_scores[c][:, np.newaxis])) #print 'boxes_scored_ar', boxes_scored_ar ############ # def nms(dets, thresh): # """ # greedily select boxes with high confidence and overlap with current maximum <= thresh # rule out overlap >= thresh # :param dets: [[x1, y1, x2, y2 score]] # :param thresh: retain overlap < thresh # :return: indexes to keep # """ # if dets.shape[0] == 0: # return [] # x1 = dets[:, 0] # y1 = dets[:, 1] # x2 = dets[:, 2] # y2 = dets[:, 3] # scores = dets[:, 4] # areas = (x2 - x1 + 1) * (y2 - y1 + 1) # order = scores.argsort()[::-1] # keep = [] # while order.size > 0: # i = order[0] # keep.append(i) # xx1 = np.maximum(x1[i], x1[order[1:]]) # yy1 = np.maximum(y1[i], y1[order[1:]]) # xx2 = np.minimum(x2[i], x2[order[1:]]) # yy2 = np.minimum(y2[i], y2[order[1:]]) # w = np.maximum(0.0, xx2 - xx1 + 1) # h = np.maximum(0.0, yy2 - yy1 + 1) # inter = w * h # ovr = inter / (areas[i] + areas[order[1:]] - inter) # inds = np.where(ovr <= thresh)[0] # order = order[inds + 1] # return keep # #print 'aaaaaaaa' # keep = nms(boxes_scored_ar, 0.3) #bei niedrigen treshhold wirfts welche raus # #print 'aa', len(keep), len(boxes_scored_ar) # #print 'keep', keep # #print 'a', len(boxes_scored_ar) # #print 'b', len(boxes_scored_ar[keep, :]) # list_result_box[c] = boxes_scored_ar[keep, :] # list_result_mask[c] = masks_ar[keep, :] ############### list_result_box[c] = boxes_scored_ar list_result_mask[c] = masks_ar return list_result_mask, list_result_box
def assign_anchor(feat_shape_p4, feat_shape_p5, feat_shape_p6, feat_shape_p7, gt_boxes, im_info, cfg, feat_stride_p4=16, scales_p4=(8, ), ratios_p4=(0.75, 1, 1.5), feat_stride_p5=32, scales_p5=(8, ), ratios_p5=(0.75, 1, 1.5), feat_stride_p6=64, scales_p6=(8, ), ratios_p6=(0.75, 1, 1.5), feat_stride_p7=128, scales_p7=(8, ), ratios_p7=(0.75, 1, 1.5), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: list of infer output shape :param gt_boxes: assign ground truth:[n, 5] :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ feat_shapes = [feat_shape_p4, feat_shape_p5, feat_shape_p6, feat_shape_p7] feat_strides = [16, 32, 64, 128] scales = np.array(scales_p5) ratios = np.array(ratios_p5) def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future if len(scales.shape) == 1: base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) else: assert len(scales.shape) == len(ratios.shape) == 2 base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = [ind for ind in xrange(total_anchors)] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) # (A) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] labels = gt_boxes[argmax_overlaps, 4] labels[max_overlaps < cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 fpn_labels = labels else: fpn_labels[:] = 0 # subsample positive labels if we have too many # num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) # fg_inds = np.where(fpn_labels >= 1)[0] # if len(fg_inds) > num_fg: # disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) # fpn_labels[disable_inds] = -1 # # subsample negative labels if we have too many # num_bg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum(fpn_labels >= 1) # bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) # if len(bg_inds) > num_bg: # disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) # fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: #fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform(fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap( fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap( fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap( fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) debug_label = np.concatenate(label_list, axis=1) # print debug_label # print"-----------total:",len(debug_label[0]) # print "--------ig-",len(debug_label[debug_label==-1]) # print "--------bg--",len(debug_label[debug_label==0]) # print "--------gg--",len(debug_label[debug_label>=1]) # print np.concatenate(label_list, axis=1)[np.concatenate(label_list, axis=1)>=1].shape #print np.concatenate(bbox_target_list, axis=2) label = { 'label': np.concatenate(label_list, axis=1), 'bbox_target': np.concatenate(bbox_target_list, axis=2), 'bbox_weight': np.concatenate(bbox_weight_list, axis=2) } return label
def assign_pyramid_anchor( feat_shapes, gt_boxes, im_info, cfg, feat_strides=(4, 8, 16, 32, 64), scales=(8, ), ratios=(0.5, 1, 2), allowed_border=0, balance_scale_bg=False, ): """ assign ground truth boxes to anchor positions :param feat_shapes: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_strides: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :param balance_scale_bg: restrict the background samples for each pyramid level :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) ratios = np.array(ratios, dtype=np.float32) assert (len(feat_shapes) == len(feat_strides)) fpn_args = [] fpn_anchors_fid = np.zeros(0).astype(int) fpn_anchors = np.zeros([0, 4]) fpn_labels = np.zeros(0) fpn_inds_inside = [] for feat_id in range(len(feat_strides)): # len(scales.shape) == 1 just for backward compatibility, will remove in the future if len(scales.shape) == 1: base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios, scales=scales) else: assert len(scales.shape) == len(ratios.shape) == 2 base_anchors = generate_anchors(base_size=feat_strides[feat_id], ratios=ratios[feat_id], scales=scales[feat_id]) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shapes[feat_id][0][-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_strides[feat_id] shift_y = np.arange(0, feat_height) * feat_strides[feat_id] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care # for sigmoid classifier, ignore the 'background' class labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) fpn_anchors_fid = np.hstack((fpn_anchors_fid, len(inds_inside))) fpn_anchors = np.vstack((fpn_anchors, anchors)) fpn_labels = np.hstack((fpn_labels, labels)) fpn_inds_inside.append(inds_inside) fpn_args.append([feat_height, feat_width, A, total_anchors]) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(fpn_anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(fpn_anchors)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap fpn_labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU fpn_labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives fpn_labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: fpn_labels[:] = 0 # subsample positive labels if we have too many num_fg = fpn_labels.shape[0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else int( cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(fpn_labels >= 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] fpn_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = fpn_labels.shape[ 0] if cfg.TRAIN.RPN_BATCH_SIZE == -1 else cfg.TRAIN.RPN_BATCH_SIZE - np.sum( fpn_labels >= 1) bg_inds = np.where(fpn_labels == 0)[0] fpn_anchors_fid = np.hstack((0, fpn_anchors_fid.cumsum())) if balance_scale_bg: num_bg_scale = num_bg / len(feat_strides) for feat_id in range(0, len(feat_strides)): bg_ind_scale = bg_inds[(bg_inds >= fpn_anchors_fid[feat_id]) & (bg_inds < fpn_anchors_fid[feat_id + 1])] if len(bg_ind_scale) > num_bg_scale: disable_inds = npr.choice(bg_ind_scale, size=(len(bg_ind_scale) - num_bg_scale), replace=False) fpn_labels[disable_inds] = -1 else: if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] fpn_labels[disable_inds] = -1 fpn_bbox_targets = np.zeros((len(fpn_anchors), 4), dtype=np.float32) if gt_boxes.size > 0: fpn_bbox_targets[fpn_labels >= 1, :] = bbox_transform( fpn_anchors[fpn_labels >= 1, :], gt_boxes[argmax_overlaps[fpn_labels >= 1], :4]) # fpn_bbox_targets[:] = bbox_transform(fpn_anchors, gt_boxes[argmax_overlaps, :4]) # fpn_bbox_targets = (fpn_bbox_targets - np.array(cfg.TRAIN.BBOX_MEANS)) / np.array(cfg.TRAIN.BBOX_STDS) fpn_bbox_weights = np.zeros((len(fpn_anchors), 4), dtype=np.float32) fpn_bbox_weights[fpn_labels >= 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) label_list = [] bbox_target_list = [] bbox_weight_list = [] for feat_id in range(0, len(feat_strides)): feat_height, feat_width, A, total_anchors = fpn_args[feat_id] # map up to original set of anchors labels = _unmap( fpn_labels[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=-1) bbox_targets = _unmap( fpn_bbox_targets[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) bbox_weights = _unmap( fpn_bbox_weights[fpn_anchors_fid[feat_id]:fpn_anchors_fid[feat_id + 1]], total_anchors, fpn_inds_inside[feat_id], fill=0) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_targets = bbox_targets.reshape((1, A * 4, -1)) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) bbox_weights = bbox_weights.reshape((1, A * 4, -1)) label_list.append(labels) bbox_target_list.append(bbox_targets) bbox_weight_list.append(bbox_weights) # label.update({'label_p' + str(feat_id + feat_id_start): labels, # 'bbox_target_p' + str(feat_id + feat_id_start): bbox_targets, # 'bbox_weight_p' + str(feat_id + feat_id_start): bbox_weights}) label = { 'label': np.concatenate(label_list, axis=1), 'bbox_target': np.concatenate(bbox_target_list, axis=2), 'bbox_weight': np.concatenate(bbox_weight_list, axis=2) } return label
def assign_anchor(feat_shape_p2, feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6, gt_boxes, im_info, cfg, feat_stride_p2=4, scales_p2=(16, ), ratios_p2=(0.75, 1, 1.5), feat_stride_p3=8, scales_p3=(16, ), ratios_p3=(0.75, 1, 1.5), feat_stride_p4=16, scales_p4=(16, ), ratios_p4=(0.75, 1, 1.5), feat_stride_p5=32, scales_p5=(16, ), ratios_p5=(0.75, 1, 1.5), feat_stride_p6=64, scales_p6=(16, ), ratios_p6=(0.75, 1, 1.5), allowed_border=1000): """ assign ground truth boxes to anchor positions :param feat_shape: list of infer output shape :param gt_boxes: assign ground truth:[n, 5] :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ allowed_border = 1000 feat_shape = [ feat_shape_p2, feat_shape_p3, feat_shape_p4, feat_shape_p5, feat_shape_p6 ] feat_stride = [4, 8, 16, 32, 64] scales = scales_p3 ratios = (0.5, 1, 2) def _unmap(data, count, inds, fill=0, allowed_border=allowed_border): """" unmap a subset inds of data into original data of size count """ if allowed_border: return data if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False debug = True im_info = im_info[0] #print 'im_info: ', im_info scales = np.array(scales, dtype=np.float32) if len(feat_stride) != len(feat_shape): assert ('length of feat_stride is not equal to length of feat_shape') all_anchors_list = [] anchors_counter = [] total_anchors = 0 t = time.time() #print 'length of feat_shape: ',len(feat_shape) for i in range(len(feat_shape)): base_anchors = generate_anchors(base_size=feat_stride[i], ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] #3 #print feat_shape[i] feat_height, feat_width = (feat_shape[i])[-2:] if DEBUG: print 'anchors:' print base_anchors print 'anchor shapes:' print np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4])) print 'im_info', im_info print 'height', feat_height, 'width', feat_width print 'gt_boxes shape', gt_boxes.shape print 'gt_boxes', gt_boxes # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride[i] shift_y = np.arange(0, feat_height) * feat_stride[i] shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors #3 K = shifts.shape[0] #h*w i_all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) i_all_anchors = i_all_anchors.reshape( (K * A, 4)) #(k*A,4) in the original image all_anchors_list.append(i_all_anchors) i_total_anchors = int(K * A) #3*w*h total_anchors += i_total_anchors anchors_counter.append(total_anchors) # only keep anchors inside the image, but in FPN, author allowed anchor outside of image # inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & # (all_anchors[:, 1] >= -allowed_border) & # (all_anchors[:, 2] < im_info[1] + allowed_border) & # (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print 'total_anchors', i_total_anchors #print 'inds_inside', len(inds_inside) # keep only inside anchors #anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors shape', anchors.shape all_anchors = np.array(all_anchors_list[0]) #(3*h1*w1,4) for i_anchors in all_anchors_list[1:]: all_anchors = np.vstack((all_anchors, i_anchors)) #all_anchors:[total_anchors,4] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((total_anchors, ), dtype=np.float32) labels.fill(-1) #print 'get anchors spends :{:.4f}s'.format(time.time()-t) t_1 = time.time() if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) #t = time.time() overlaps = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float)) #print 'bbox overlaps spends :{:.4f}s'.format(time.time()-t) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 t_1_1 = time.time() # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((total_anchors, 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(all_anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((total_anchors, 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print 'means', means print 'stdevs', stds #print 'choose labels spends :{:.4f}s'.format(time.time()-t_1_1) #print 'sort labels spends :{:.4f}s'.format(time.time()-t_1) # map up to original set of anchors # print '---------++++++++++++++++++++++++++++++++-----------------',len(labels[labels!=-1]),len(labels[labels==1]) t_2 = time.time() labels_list = [] bbox_targets_list = [] bbox_weights_list = [] labels_list.append( _unmap(labels[:anchors_counter[0]], anchors_counter[0], range(anchors_counter[0]), fill=-1)) bbox_targets_list.append( _unmap(bbox_targets[range(anchors_counter[0]), :], anchors_counter[0], range(anchors_counter[0]), fill=0)) bbox_weights_list.append( _unmap(bbox_weights[range(anchors_counter[0]), :], anchors_counter[0], range(anchors_counter[0]), fill=0)) for i in range(1, len(feat_shape)): count = anchors_counter[i] - anchors_counter[i - 1] labels_list.append( _unmap(labels[anchors_counter[i - 1]:anchors_counter[i]], count, range(count), fill=-1)) bbox_targets_list.append( _unmap(bbox_targets[anchors_counter[i - 1]:anchors_counter[i], :], count, range(count), fill=0)) bbox_weights_list.append( _unmap(bbox_weights[anchors_counter[i - 1]:anchors_counter[i], :], count, range(count), fill=0)) if DEBUG: # print 'rpn: max max_overlaps', np.max(max_overlaps) print 'rpn: num_positives', np.sum(labels == 1) print 'rpn: num_negatives', np.sum(labels == 0) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print 'rpn: num_positive avg', _fg_sum / _count print 'rpn: num_negative avg', _bg_sum / _count feat_heights = [] feat_widths = [] for i in range(len(feat_shape)): feat_heights.append(feat_shape[i][-2]) feat_widths.append(feat_shape[i][-1]) #print '_unmap spends :{:.4f}s'.format(time.time()-t_2) label1 = labels_list[0].reshape( (1, feat_heights[0], feat_widths[0], A)).transpose(0, 3, 1, 2) labels1 = label1.reshape((1, A * feat_heights[0] * feat_widths[0])) bbox_targets1 = bbox_targets_list[0].reshape( (1, feat_heights[0], feat_widths[0], A * 4)).transpose(0, 3, 1, 2) bbox_weights1 = bbox_weights_list[0].reshape( (1, feat_heights[0], feat_widths[0], A * 4)).transpose((0, 3, 1, 2)) label2 = labels_list[1].reshape( (1, feat_heights[1], feat_widths[1], A)).transpose(0, 3, 1, 2) labels2 = label2.reshape((1, A * feat_heights[1] * feat_widths[1])) bbox_targets2 = bbox_targets_list[1].reshape( (1, feat_heights[1], feat_widths[1], A * 4)).transpose(0, 3, 1, 2) bbox_weights2 = bbox_weights_list[1].reshape( (1, feat_heights[1], feat_widths[1], A * 4)).transpose((0, 3, 1, 2)) label3 = labels_list[2].reshape( (1, feat_heights[2], feat_widths[2], A)).transpose(0, 3, 1, 2) labels3 = label3.reshape((1, A * feat_heights[2] * feat_widths[2])) bbox_targets3 = bbox_targets_list[2].reshape( (1, feat_heights[2], feat_widths[2], A * 4)).transpose(0, 3, 1, 2) bbox_weights3 = bbox_weights_list[2].reshape( (1, feat_heights[2], feat_widths[2], A * 4)).transpose((0, 3, 1, 2)) if len(feat_shape) > 3: label4 = labels_list[3].reshape( (1, feat_heights[3], feat_widths[3], A)).transpose(0, 3, 1, 2) labels4 = label4.reshape((1, A * feat_heights[3] * feat_widths[3])) bbox_targets4 = bbox_targets_list[3].reshape( (1, feat_heights[3], feat_widths[3], A * 4)).transpose(0, 3, 1, 2) bbox_weights4 = bbox_weights_list[3].reshape( (1, feat_heights[3], feat_widths[3], A * 4)).transpose( (0, 3, 1, 2)) if len(feat_shape) > 4: label5 = labels_list[4].reshape( (1, feat_heights[4], feat_widths[4], A)).transpose(0, 3, 1, 2) labels5 = label5.reshape((1, A * feat_heights[4] * feat_widths[4])) bbox_targets5 = bbox_targets_list[4].reshape( (1, feat_heights[4], feat_widths[4], A * 4)).transpose(0, 3, 1, 2) bbox_weights5 = bbox_weights_list[4].reshape( (1, feat_heights[4], feat_widths[4], A * 4)).transpose( (0, 3, 1, 2)) if len(feat_shape) > 5: assert ( 'RPN anchorloader only support max number of feature map of 5!') # 'label/p4': labels2, 'label/p5': labels3, #, 'bbox_target/p4': bbox_targets2, 'bbox_target/p5': bbox_targets3, #, 'bbox_weight/p4': bbox_weights2, 'bbox_weight/p5': bbox_weights3 if len(feat_shape) == 3: label = { 'label/p3': labels1, 'label/p4': labels2, 'label/p5': labels3, 'bbox_target/p3': bbox_targets1, 'bbox_target/p4': bbox_targets2, 'bbox_target/p5': bbox_targets3, 'bbox_weight/p3': bbox_weights1, 'bbox_weight/p4': bbox_weights2, 'bbox_weight/p5': bbox_weights3, } elif len(feat_shape) == 4: label = { 'label/p3': labels1, 'label/p4': labels2, 'label/p5': labels3, 'label/p6': labels4, 'bbox_target/p3': bbox_targets1, 'bbox_target/p4': bbox_targets2, 'bbox_target/p5': bbox_targets3, 'bbox_target/p6': bbox_targets4, 'bbox_weight/p3': bbox_weights1, 'bbox_weight/p4': bbox_weights2, 'bbox_weight/p5': bbox_weights3, 'bbox_weight/p6': bbox_weights4 } elif len(feat_shape) == 5: label = { 'label/p2': labels1, 'label/p3': labels2, 'label/p4': labels3, 'label/p5': labels4, 'label/p6': labels5, 'bbox_target/p2': bbox_targets1, 'bbox_target/p3': bbox_targets2, 'bbox_target/p4': bbox_targets3, 'bbox_target/p5': bbox_targets4, 'bbox_target/p6': bbox_targets5, 'bbox_weight/p2': bbox_weights1, 'bbox_weight/p3': bbox_weights2, 'bbox_weight/p4': bbox_weights3, 'bbox_weight/p5': bbox_weights4, 'bbox_weight/p6': bbox_weights5 } #print 'get labels spends :{:.4f}s'.format(time.time()-t_2) return label
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count, ), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] if DEBUG: print('anchors:') print(base_anchors) print('anchor shapes:') print( np.hstack((base_anchors[:, 2::4] - base_anchors[:, 0::4], base_anchors[:, 3::4] - base_anchors[:, 1::4]))) print('im_info', im_info) print('height', feat_height, 'width', feat_width) print('gt_boxes shape', gt_boxes.shape) print('gt_boxes', gt_boxes) # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) if DEBUG: disable_inds = fg_inds[:(len(fg_inds) - num_fg)] labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) if DEBUG: disable_inds = bg_inds[:(len(bg_inds) - num_bg)] labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) if DEBUG: _sums = bbox_targets[labels == 1, :].sum(axis=0) _squared_sums = (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts = np.sum(labels == 1) means = _sums / (_counts + 1e-14) stds = np.sqrt(_squared_sums / _counts - means**2) print('means', means) print('stdevs', stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) if DEBUG: print('rpn: max max_overlaps', np.max(max_overlaps)) print('rpn: num_positives', np.sum(labels == 1)) print('rpn: num_negatives', np.sum(labels == 0)) _fg_sum = np.sum(labels == 1) _bg_sum = np.sum(labels == 0) _count = 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) labels = labels.reshape( (1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape( (1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape( (1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = { 'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights } return label
def assign_anchor(feat_shape, gt_boxes, im_info, cfg, feat_stride=16, scales=(8, 16, 32), ratios=(0.5, 1, 2), allowed_border=0, valid_ranges=None, invalid_anchor_threshold=0.3): """ assign ground truth boxes to anchor positions :param feat_shape: infer output shape :param gt_boxes: assign ground truth :param im_info: filter out anchors overlapped with edges :param feat_stride: anchor position step :param scales: used to generate anchors, affects num_anchors (per location) :param ratios: aspect ratios of generated anchors :param allowed_border: filter out anchors with edge overlap > allowed_border :return: dict of label 'label': of shape (batch_size, 1) <- (batch_size, num_anchors, feat_height, feat_width) 'bbox_target': of shape (batch_size, num_anchors * 4, feat_height, feat_width) 'bbox_inside_weight': *todo* mark the assigned anchors 'bbox_outside_weight': used to normalize the bbox_loss, all weights sums to RPN_POSITIVE_WEIGHT """ def _unmap(data, count, inds, fill=0): """" unmap a subset inds of data into original data of size count """ if len(data.shape) == 1: ret = np.empty((count,), dtype=np.float32) ret.fill(fill) ret[inds] = data else: ret = np.empty((count,) + data.shape[1:], dtype=np.float32) ret.fill(fill) ret[inds, :] = data return ret DEBUG = False im_info = im_info[0] scales = np.array(scales, dtype=np.float32) base_anchors = generate_anchors(base_size=feat_stride, ratios=list(ratios), scales=scales) num_anchors = base_anchors.shape[0] feat_height, feat_width = feat_shape[-2:] # 1. generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, feat_width) * feat_stride shift_y = np.arange(0, feat_height) * feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = num_anchors K = shifts.shape[0] all_anchors = base_anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= -allowed_border) & (all_anchors[:, 1] >= -allowed_border) & (all_anchors[:, 2] < im_info[1] + allowed_border) & (all_anchors[:, 3] < im_info[0] + allowed_border))[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) if gt_boxes.size > 0: # overlap between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(anchors.astype(np.float), gt_boxes.astype(np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IoU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 else: labels[:] = 0 if valid_ranges is None: # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) label = {'label': labels, 'bbox_target': bbox_targets, 'bbox_weight': bbox_weights} return label else: all_labels, all_bbox_targets, all_bbox_weights = [], [], [] for valid_range in valid_ranges: cls_labels = labels.copy() if gt_boxes.size > 0: gt_boxes_sizes = (gt_boxes[:, 3] - gt_boxes[:, 1] + 1.) * (gt_boxes[:, 4] - gt_boxes[:, 2] + 1.) invalid_inds = np.where((gt_boxes_sizes < valid_range[0]**2) | (gt_boxes_sizes > valid_range[1]**2))[0] invalid_gt_boxes = gt_boxes[invalid_inds, :] if len(invalid_inds) > 0: invalid_overlaps = bbox_overlaps(anchors.astype(np.float), invalid_gt_boxes.astype(np.float)) invalid_argmax_overlaps = invalid_overlaps.argmax(axis=1) invalid_max_overlaps = invalid_overlaps[np.arange(len(inds_inside)), invalid_argmax_overlaps] disable_inds = np.where((invalid_max_overlaps > invalid_anchor_threshold))[0] cls_labels[disable_inds] = -1 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCH_SIZE) fg_inds = np.where(cls_labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) cls_labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCH_SIZE - np.sum(cls_labels == 1) bg_inds = np.where(cls_labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) cls_labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) if gt_boxes.size > 0: bbox_targets[:] = bbox_transform(anchors, gt_boxes[argmax_overlaps, :4]) bbox_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_weights[cls_labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_WEIGHTS) # map up to original set of anchors cls_labels = _unmap(cls_labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_weights = _unmap(bbox_weights, total_anchors, inds_inside, fill=0) cls_labels = cls_labels.reshape((1, feat_height, feat_width, A)).transpose(0, 3, 1, 2) cls_labels = cls_labels.reshape((1, A * feat_height * feat_width)) bbox_targets = bbox_targets.reshape((1, feat_height, feat_width, A * 4)).transpose(0, 3, 1, 2) bbox_weights = bbox_weights.reshape((1, feat_height, feat_width, A * 4)).transpose((0, 3, 1, 2)) all_labels.append(cls_labels) all_bbox_targets.append(bbox_targets) all_bbox_weights.append(bbox_weights) all_labels = np.vstack(all_labels) all_bbox_targets = np.vstack(all_bbox_targets) all_bbox_weights = np.vstack(all_bbox_weights) valid_ranges = np.array([[0, 90], [30, 160], [90, -1]], dtype=np.float32).reshape(-1, 2) valid_ranges *= im_info[2] inds = np.where(valid_ranges[:, 1] < 0)[0] valid_ranges[inds, 1] = max(im_info[0], im_info[1]) label = {'label': all_labels, 'bbox_target': all_bbox_targets, 'bbox_weight': all_bbox_weights, 'valid_ranges': valid_ranges} return label