def bbox_vote(dets_NMS, dets_all, thresh=0.8): dets_voted = np.zeros_like(dets_NMS) # Empty matrix with the same shape and type _overlaps = bbox_overlaps( np.ascontiguousarray(dets_NMS[:, 0:4], dtype=np.float), np.ascontiguousarray(dets_all[:, 0:4], dtype=np.float)) # for each survived box for i, det in enumerate(dets_NMS): dets_overlapped = dets_all[np.where(_overlaps[i, :] >= thresh)[0]] assert(len(dets_overlapped) > 0) boxes = dets_overlapped[:, 0:4] scores = dets_overlapped[:, 4] out_box = np.dot(scores, boxes) dets_voted[i][0:4] = out_box / sum(scores) # Weighted bounding boxes dets_voted[i][4] = det[4] # Keep the original score # Weighted scores (if enabled) BBOX_VOTE_N_WEIGHTED_SCORE=1 BBOX_VOTE_WEIGHT_EMPTY=0.5 if BBOX_VOTE_N_WEIGHTED_SCORE > 1: n_agreement = BBOX_VOTE_N_WEIGHTED_SCORE w_empty = BBOX_VOTE_WEIGHT_EMPTY n_detected = len(scores) if n_detected >= n_agreement: top_scores = -np.sort(-scores)[:n_agreement] new_score = np.average(top_scores) else: new_score = np.average(scores) * (n_detected * 1.0 + (n_agreement - n_detected) * w_empty) / n_agreement dets_voted[i][4] = min(new_score, dets_voted[i][4]) return dets_voted
def image_eval(pred, gt, ignore, iou_thresh): """ single image evaluation pred: Nx5 gt: Nx4 ignore: """ _pred = pred.copy() _gt = gt.copy() pred_recall = np.zeros(_pred.shape[0]) recall_list = np.zeros(_gt.shape[0]) proposal_list = np.ones(_pred.shape[0]) _pred[:, 2] = _pred[:, 2] + _pred[:, 0] _pred[:, 3] = _pred[:, 3] + _pred[:, 1] _gt[:, 2] = _gt[:, 2] + _gt[:, 0] _gt[:, 3] = _gt[:, 3] + _gt[:, 1] overlaps = bbox_overlaps(_pred[:, :4], _gt) for h in range(_pred.shape[0]): gt_overlap = overlaps[h] max_overlap, max_idx = gt_overlap.max(), gt_overlap.argmax() if max_overlap >= iou_thresh: if ignore[max_idx] == 0: recall_list[max_idx] = -1 proposal_list[h] = -1 elif recall_list[max_idx] == 0: recall_list[max_idx] = 1 r_keep_index = np.where(recall_list == 1)[0] pred_recall[h] = len(r_keep_index) return pred_recall, proposal_list
def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes, ), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes, ), dtype=np.float32), }) return roidb
def __call__(self, image, boxes=None, labels=None): crop_height = self.crop_height crop_width = self.crop_width center_crop = self.center_crop image_height, image_width = image.shape[0], image.shape[1] max_offset_height = image_height - self.crop_height + 1 max_offset_width = image_width - self.crop_width + 1 if center_crop == True: offset_height = (image_height - self.crop_height) / 2 offset_width = (image_width - self.crop_width) / 2 else: offset_height = np.random.randint(low=0, high=max_offset_height, size=(1, ))[0] offset_width = np.random.randint(low=0, high=max_offset_width, size=(1, ))[0] cropped_im = image[offset_height:offset_height + crop_height, offset_width:offset_width + crop_width, :] if boxes.shape[0] == 0: return cropped_im, boxes, labels ori_boxes = boxes.copy() boxes[:, 0] = np.maximum(boxes[:, 0], offset_width) boxes[:, 1] = np.maximum(boxes[:, 1], offset_height) boxes[:, 2] = np.minimum(boxes[:, 2], offset_width + crop_width - 1) boxes[:, 3] = np.minimum(boxes[:, 3], offset_height + crop_height - 1) tovlp = bbox_overlaps(boxes.astype(np.float64), ori_boxes.astype(np.float64)) argmax_tovlp = tovlp.argmax(axis=1) max_toplp = tovlp[np.arange(tovlp.shape[0]), argmax_tovlp] labelRect = ori_boxes.copy() labelRect[:, 0] -= offset_width labelRect[:, 1] -= offset_height labelRect[:, 2] -= offset_width labelRect[:, 3] -= offset_height labelRect[:, 0] = np.minimum(crop_width - 1, np.maximum(0, labelRect[:, 0])) labelRect[:, 1] = np.minimum(crop_height - 1, np.maximum(0, labelRect[:, 1])) labelRect[:, 2] = np.minimum(crop_width - 1, np.maximum(0, labelRect[:, 2])) labelRect[:, 3] = np.minimum(crop_height - 1, np.maximum(0, labelRect[:, 3])) invalid_idx = np.logical_or(labelRect[:, 2] <= labelRect[:, 0], labelRect[:, 3] <= labelRect[:, 1]) invalid_idx = np.logical_or(invalid_idx, max_toplp < 0.2) invalid_idx = np.where(invalid_idx == True) gt_boxes = np.delete(labelRect, invalid_idx[0], axis=0) labels = np.delete(labels, invalid_idx[0], axis=0) return cropped_im, gt_boxes, labels
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap roi_fg_threshold fg_inds = np.where(max_overlaps >= 0.5)[0] # Guard against the case when an image has fewer than fg_rois_per_image # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) roi_bg_threshold_high roi_bg_threshold_low bg_inds = np.where((max_overlaps < 0.5) & (max_overlaps >= 0.1))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: bg_inds = np.where((max_overlaps < 0.5))[0] to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 #raise Exception() # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] bbox_target_data = _compute_targets( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def relationship_checker(gt_objects, gt_relationships, bicls, boxes_s, boxes_o, thres=0.99): ''' :param gt_objects: (gt_num, 5) [x1,y1,x2,y2,cls] :param gt_relationships: (gt_num, gt_num) :param bicls: prediction of 'have relationship or not' (gt*(gt-1), 1) :param boxes_s: (gt*(gt-1), 5) [0,x1,y1,x2,y2] :param boxes_o: (gt*(gt-1), 5) :return: ''' gt_rel_sub_idx, gt_rel_obj_idx = np.where( gt_relationships > 0) # ground truth number gt_sub = gt_objects[gt_rel_sub_idx, :5] gt_obj = gt_objects[gt_rel_obj_idx, :5] gt_rel = gt_relationships[gt_rel_sub_idx, gt_rel_obj_idx] recall_total = len(gt_rel) precision_total = np.sum(bicls >= 0.5) recall_correct = 0 sub_overlaps = bbox_overlaps( np.ascontiguousarray(boxes_s[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_sub[:, :4], dtype=np.float)) obj_overlaps = bbox_overlaps( np.ascontiguousarray(boxes_o[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_obj[:, :4], dtype=np.float)) for gt_id in xrange(recall_total): fg_candidate = np.where( np.logical_and(sub_overlaps[:, gt_id] == 1, obj_overlaps[:, gt_id] == 1))[0] for candidate_id in fg_candidate: if bicls[candidate_id] >= 0.5: recall_correct += 1 break precision_correct = recall_correct return precision_correct, precision_total, recall_correct, recall_total
def check_recall(rois, gt_objects, top_N, thres=0.5): overlaps = bbox_overlaps( np.ascontiguousarray(rois.cpu().data.numpy()[:top_N, 1:5], dtype=np.float), np.ascontiguousarray(gt_objects[:4], dtype=np.float)) overlap_gt = np.amax(overlaps, axis=0) correct_cnt = np.sum(overlap_gt >= thres) total_cnt = overlap_gt.size return correct_cnt, total_cnt
def _merge_dets(self, detections, tile_ids): detections = np.asarray(detections, dtype=DET_DTYPE).view(np.recarray) tile_ids = np.asarray(tile_ids) if len(detections) == 0: return detections # merge detections across different tiles bboxes = detections.tlbr ious = bbox_overlaps(bboxes, bboxes) detections = self._merge(detections, tile_ids, ious, self.merge_thresh) return detections.view(np.recarray)
def _iou_cost(self, trk_ids, detections): if len(trk_ids) == 0 or len(detections) == 0: return np.empty((len(trk_ids), len(detections))) # make sure associated pair has the same class label trk_labels = np.array([self.tracks[trk_id].label for trk_id in trk_ids]) trk_bboxes = np.array([self.tracks[trk_id].tlbr for trk_id in trk_ids]) det_bboxes = detections.tlbr ious = bbox_overlaps(trk_bboxes, det_bboxes) ious = self._gate_cost(ious, trk_labels, detections.label, self.iou_thresh, True) return ious
def checker(rois, gt_objects, thres=0.7): overlaps = bbox_overlaps( np.ascontiguousarray(rois.cpu().data.numpy()[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_objects[:, :4], dtype=np.float)) max_overlaps = np.amax(overlaps, axis=1) precision_correct = np.sum(max_overlaps >= thres) precision_total = max_overlaps.size overlaps_gt = np.amax(overlaps, axis=0) recall_correct = np.sum(overlaps_gt >= thres) recall_total = overlaps_gt.size return precision_correct, precision_total, recall_correct, recall_total
def cython_bbox_ious(atlbrs, btlbrs): ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float) if ious.size == 0: return ious try: import cython_bbox except Exception as e: logger.error('cython_bbox not found, please install cython_bbox.' 'for example: `pip install cython_bbox`.') raise e ious = cython_bbox.bbox_overlaps( np.ascontiguousarray(atlbrs, dtype=np.float), np.ascontiguousarray(btlbrs, dtype=np.float)) return ious
def compute_pairwise_iou(a, b): """Computes the pairwise intersection over union for the arrays of boxes a and b. Args: a: np.ndarray; Array of N boxes in format x1y1x2y2. b: np.ndarray; Array of M boxes in format x1y1x2y2. Returns: np.ndarray; A NxM array where the entry at (i, j) is the intersection over union of box i from a, and box j from b. """ C = 1 - bbox_overlaps( np.ascontiguousarray(a, dtype=np.float64), np.ascontiguousarray(b, dtype=np.float64), ) return C
def iou_distance(A, B): '''计算轨迹之间的IOU距离 Args: A (list of Trajectory): 轨迹组A B (list of Trajectory): 轨迹组B Returns: costs (numpy.ndarray): 代价矩阵 ''' BA = [a.ltrb for a in A] BB = [b.ltrb for b in B] ious = np.zeros((len(A), len(B)), dtype=np.float) if ious.size == 0: return ious ious = bbox_overlaps(np.ascontiguousarray(BA, dtype=np.float), np.ascontiguousarray(BB, dtype=np.float)) costs = 1 - ious return costs
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] fg_inds = np.where(max_overlaps >= 0.5)[0] bg_inds = np.where((max_overlaps < 0.5) & (max_overlaps >= 0.1))[0] if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) bg_rois_per_image = rois_per_image - fg_rois_per_image to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) elif fg_inds.size > 0: to_replace = fg_inds.size < rois_per_image fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: raise Exception() keep_inds = np.append(fg_inds, bg_inds) labels = labels[keep_inds] labels[int(fg_rois_per_image):] = 0 rois = all_rois[keep_inds] roi_scores = all_scores[keep_inds] bbox_target_data = _compute_targets_label( rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = _get_bbox_regression_labels( bbox_target_data, num_classes) return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def _remove_duplicate(self, updated, aged): if len(updated) == 0 or len(aged) == 0: return updated_bboxes = np.array([self.tracks[trk_id].tlbr for trk_id in updated]) aged_bboxes = np.array([self.tracks[trk_id].tlbr for trk_id in aged]) ious = bbox_overlaps(updated_bboxes, aged_bboxes) idx = np.where(ious >= self.duplicate_iou) dup_ids = set() for row, col in zip(*idx): updated_id, aged_id = updated[row], aged[col] if self.tracks[updated_id].start_frame <= self.tracks[aged_id].start_frame: dup_ids.add(aged_id) else: dup_ids.add(updated_id) for trk_id in dup_ids: LOGGER.debug('Duplicate: %s', self.tracks[trk_id]) del self.tracks[trk_id]
def eval(boxes, label, scores, gt_bboxes, gt_label): frame_det = np.empty((0, 2)) video_det = np.empty((0, 2)) for i in xrange(len(boxes)): if not(label[i] == gt_label): s = np.array([scores[i], 0]) video_det = np.vstack((video_det, s)) s = np.expand_dims(s, axis=0) #frame_det = np.vstack((frame_det, np.repeat(s, boxes[i].shape[0], axis=0))) iou = 0 for j in xrange(boxes[i].shape[0]): frame_idx = boxes[i][j, 0] curr_box = np.expand_dims(boxes[i][j, 1 : 5], axis=0) curr_gt_idx = np.where(gt_bboxes[:,:,0] == frame_idx) curr_gt = gt_bboxes[curr_gt_idx] curr_gt = curr_gt[:, 1 : 5] overlaps = bbox_overlaps( np.ascontiguousarray(curr_box, dtype=np.float), np.ascontiguousarray(curr_gt, dtype=np.float)).max() frame_det = np.vstack((frame_det, np.array([scores[i], overlaps]))) iou += overlaps for j in xrange(int(gt_bboxes.shape[1] - boxes[i][-1, 0] - 1)): frame_det = np.vstack((frame_det, np.array([scores[i], 0.93]))) iou += 0.83 for j in xrange(int(boxes[i][0,0])): frame_det = np.vstack((frame_det, np.array([0, 1]))) pass video_det = np.vstack((video_det, np.array([scores[i], iou / (gt_bboxes.shape[1] - boxes[i][0, 0])]))) gt_nums = gt_bboxes.size / 5 gt_vid = gt_bboxes.shape[0] return frame_det, video_det, gt_nums, gt_vid
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors im_info = im_info[0] _allowed_border = 0 height, width = rpn_cls_score.shape[1:3] inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & (all_anchors[:, 3] < im_info[0] + _allowed_border))[0] anchors = all_anchors[inds_inside, :] labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] labels[max_overlaps < 0.3] = 0 labels[gt_argmax_overlaps] = 1 labels[max_overlaps > 0.7] = 1 num_fg = int(256 * 0.5) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=len(fg_inds) - num_fg, replace=False) labels[disable_inds] = -1 num_bg = 256 - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=len(bg_inds) - num_bg, replace=False) labels[disable_inds] = -1 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0)) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) labels = labels.reshape((1, height, width, A)).transpose((0, 3, 1, 2)) labels = labels.reshape((1, 1, height * A, width)) rpn_labels = labels bbox_targets = bbox_targets.reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets bbox_inside_weights = bbox_inside_weights.reshape( (1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights bbox_outside_weights = bbox_outside_weights.reshape( (1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """ # bbox_targets:256*(4*21)的矩阵,只有为正样本时,对应类别的坐标才不为0,其他类别的坐标全为0 # bbox_inside_weights:256*(4*21)的矩阵,正样本时,对应类别四个坐标的权重为1,其他全为0 # labels 128 个 前面的fg_rois_per_image是正样本 小于等于32 是非极大值抑制之后 筛选最优的 128个 # rois 取128个 前面的fg_rois_per_image是正样本 小于等于32 是非极大值抑制之后 筛选最优的 128个 #roi_scores 取128个 前面的fg_rois_per_image是正样本 小于等于32 是非极大值抑制之后 筛选最优的 128个 return labels, rois, roi_scores, bbox_targets, bbox_inside_weights examples. # all_rois all_scores 非极大值抑制得到的框 与 交并比 # #rois roi_scores 非极大值抑制后得到的 输入的是 特征图所有的值计算非极大值抑制 # gt_boxes 0.25*128 128 21 """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) #overlaps交并比 特征图中非极大值抑制之后 得到的框 与真实边框的 交并比值 #[N.V] gt_assignment = overlaps.argmax(axis=1) #列 最大的下标[N] max_overlaps = overlaps.max(axis=1) #列 最大的值 [N] labels = gt_boxes[gt_assignment, 4] #label不是one_hot #cfg.TRAIN.FG_THRESH = 0.5 fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # cfg.TRAIN.BG_THRESH_HI = 0.5 # cfg.TRAIN.BG_THRESH_LO = 0.1 bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Small modification to the original version where we ensure a fixed number of regions are sampled if fg_inds.size > 0 and bg_inds.size > 0: fg_rois_per_image = min(fg_rois_per_image, fg_inds.size) #fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # 0.25*128 fg_rois_per_image #比较 0.25*128 与 #overlaps交并比 特征图中非极大值抑制之后 得到的框 与真实边框的 交并比值 #列的最大值且大于 0.5的个数 与 0.25*128 取较小值 #就是将 筛选overlaps交并比 得到的 框控制在 0.25*128 范围之内 #fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] #在列的最大值且大于 0.5的个数 中随机抽取 fg_rois_per_image 小于等于32个 fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False) #fg_inds iou大于iou 0.5 小于等于32个 的下标 #fg_rois_per_image 小于32 就是本来的数据 #选取正正样本的下标 # rois_per_image = 128 - fg_rois_per_image 小于等于32个 bg_rois_per_image = rois_per_image - fg_rois_per_image #总共要得到 128个 正样本 和负样本 fg_rois_per_image正样本个数 #bg_rois_per_image负样本个数 #0.1=< bg_inds <0.5 to_replace = bg_inds.size < bg_rois_per_image bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace) #replace表示随机所选的元素中 ,是否可以重复 当为True 可以重复 #bg_inds选取负样本的下标 elif fg_inds.size > 0: #fg_inds 是大于0.5 ##0.1=< bg_inds <0.5为0 to_replace = fg_inds.size < rois_per_image # =128 fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = rois_per_image elif bg_inds.size > 0: #0.1=< bg_inds <0.5 #是大于 0.5为0 to_replace = bg_inds.size < rois_per_image bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace) fg_rois_per_image = 0 else: #?????????????????????????????????????? import pdb pdb.set_trace() #fg_inds正样本的下标 bg_inds负样本的下标 加起来 =128 keep_inds = np.append(fg_inds, bg_inds) #一维的数据拼接在一起 labels = labels[keep_inds] # 取128个 #fg_rois_per_image 正样本的个数 bg_rois_per_image负样本的个数 labels[int(fg_rois_per_image):] = 0 #正样本和后面的标签 设置为 0 # labels 128 个 rois = all_rois[keep_inds] #取128个 前面的fg_rois_per_image是正样本 小于等于32 roi_scores = all_scores[keep_inds] #取128个 前面的fg_rois_per_image是正样本 小于等于32 # gt_assignment = overlaps.argmax(axis=1)#列 最大的下标[N] bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) #rois = all_rois[keep_inds]#取128个 前面的fg_rois_per_image是正样本 小于等于32 #gt_boxes[gt_assignment[keep_inds], :4] labels 是经过筛选IOU得到的128个值 前面的fg_rois_per_image是正样本 # 返回 组合 [标签 , dx ,dy, dw ,dh] # bbox_target_data =return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False) #组合[标签 , dx, dy, dw, dh] 21 # label不是one_hot bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) # bbox_targets:256*(4*21)的矩阵,只有为正样本时,对应类别的坐标才不为0,其他类别的坐标全为0 # bbox_inside_weights:256*(4*21)的矩阵,正样本时,对应类别四个坐标的权重为1,其他全为0 # bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] # 对应的坐标偏移 赋值 给对应的类别 # # [标签 , dx ,dy, dw ,dh] 的 dx ,dy, dw ,dh 转换到 256*(4*21)的矩阵 # bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS # [1.0, 1.0, 1.0, 1.0] # 对应的权重(1.0, 1.0, 1.0, 1.0) 赋值给对应的类别 # return bbox_targets, bbox_inside_weights # bbox_targets:256*(4*21)的矩阵,只有为正样本时,对应类别的坐标才不为0,其他类别的坐标全为0 # bbox_inside_weights:256*(4*21)的矩阵,正样本时,对应类别四个坐标的权重为1,其他全为0 # labels 128 个 前面的fg_rois_per_image是正样本 小于等于32 是非极大值抑制之后 筛选最优的 128个 # rois 取128个 前面的fg_rois_per_image是正样本 小于等于32 是非极大值抑制之后 筛选最优的 128个 #roi_scores 取128个 前面的fg_rois_per_image是正样本 小于等于32 是非极大值抑制之后 筛选最优的 128个 return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7 } area_ranges = [ [0**2, 1e5**2], # all [0**2, 32**2], # small [32**2, 96**2], # medium [96**2, 1e5**2], # large [96**2, 128**2], # 96-128 [128**2, 256**2], # 128-256 [256**2, 512**2], # 256-512 [512**2, 1e5**2], # 512-inf ] assert area in areas, 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) # print('self.roidb[i]',i,self.roidb[i]) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max( axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return { 'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps }
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # rpn_clobber_positives if not False: # rpn_negative_overlap labels[max_overlaps < 0.3] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # rpn_positive_overlap labels[max_overlaps >= 0.7] = 1 # rpn_clobber_positives if False: # rpn_negative_overlap labels[max_overlaps < 0.3] = 0 # rpn_fg_fraction * rpn_batchsize num_fg = int(0.5 * 256) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # rpn_batchsize num_bg = 256 - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # bbox_inside_weights (1.0, 1.0, 1.0, 1.0) bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0)) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # rpn_positive_weight if -1 < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((-1 > 0) & (-1 < 1)) positive_weights = (-1 / np.sum(labels == 1)) negative_weights = ((1.0 - -1) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): """ #返回的是 特征图映射到原图的 所有的边框 #把超出图像尺寸的 边框 置为-1 #在输入的所有的边框与标签中 帅选出 小于等于256/2 个正负样本 总共 就是256 样本 正为1负为0 其他为-1 #超出图像尺寸的边框的 label等置为-1 边框偏移量 0 边框权重0 边框权重的归一化参数0 # 标签正样本1,负0,不关注-1 (1, 1, A * height, width) # 边框 偏移量 是偏移量 dx dy dw dh 是中心坐标与 边框长度的偏移量(1, height, width, A * 4) # 边框权重1 (1, height, width, A * 4) # 边框权重的归一化参数 (1, height, width, A * 4) return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights rpn_cls_score rpn 一条路径得到的 背景前景值 [:, :, :, 18] self._gt_boxes = tf.placeholder(tf.float32, shape=[None, 5]) self._feat_stride = 16 self._im_info = tf.placeholder(tf.float32, shape=[3]) self._anchors wgg特征图 对应原始坐标的所有 边框 [-1,4] self._num_anchors = 9 """ A = num_anchors #=9 total_anchors = all_anchors.shape[0] #total_anchors得到 锚点的个数 N*9个 K = total_anchors / num_anchors # 得到N 就是得到VGG特征图有几个点 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3]#rpn_cls_score [:, :, :, 18] # only keep anchors inside the image # np.where返回的是满足条件的 标索引 和类型 [0]意思是只返回索引 inds_inside = np.where(# 所有archors边界可能超出图像,取在图像内部的archors的索引 (all_anchors[:, 0] >= -_allowed_border) &#_allowed_border=0 (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border))[0] # height """ i= np.array([1,1,1,2,3,4,5]) inds_inside = np.where((i>= 2)) print(inds_inside) (array([3, 4, 5, 6], dtype=int64),) """ # keep only inside anchors # 得到在图像内部archors的坐标 anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) # np.empty() # 返回一个随机元素的矩阵,大小按照参数定义 labels.fill(-1)#把里面的值都变为-1 # label: 1 正样本, 0 负样本, -1 不关注 #????????????????????????????????????????????????????????????????????????????????? # overlaps between the anchors and the gt boxes # overlaps (ex, gt) # 计算每个anchors:n*4和每个真实位置 gt_boxes:m*4的重叠区域的比的矩阵:n*m overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # ????????????????????????????????????????????????????????????????????????????????? #这里不可以自己写?????????????????? # overlaps n*m 是重叠区域 交并比 猜的 猜的 猜的 #overlaps n*m argmax_overlaps 是下标 argmax_overlaps = overlaps.argmax(axis=1) # 找到每行最大值的位置,即每个archors对应的正样本的位置,得到 [n] 1维 的行向量 #首先 overlaps 得到预测的 预测与真实边框的置信度 是经过 inds_inside帅选所以 inds_inside的个数等于overlaps个数 #得到每个预测的边框的 anchors 与gt_boxes 比最大的值max_overlaps max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0)#索引 #上面是 求得行的最大值 是一个预测与 所有真实 的最大 # 这里求得列的最大值 是所有的预测 与 所有的真实边框的 一个一个一个 框的比值 的最大值索引 #gt_argmax_overlaps [1,V] 遍历V overlaps [n,m] m gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] #gt_argmax_overlaps 是预测的所有边框 gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # #__C.TRAIN.RPN_CLOBBER_POSITIVES = False if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them # first set the negatives #label 是和 置信度 具有一样长度 值全为-1 # labels = np.empty((len(inds_inside),), dtype=np.float32) # label: 1 正样本, 0 负样本, -1 不关注 # labels.fill(-1) #__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 # max_overlaps是列最大值的地方 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # 将archors对应的正样本的重叠区域中小于阈值的置0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 #每个真实位置对应的archors置1 # fg label: above threshold IOU __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 #得到的是真实边框对应最适合的一个预测边框 # __C.TRAIN.RPN_CLOBBER_POSITIVES = False if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives #cfg.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # 将archors对应的正样本的重叠区域中小于阈值的置0 # 限定得到的框在256/2 之内 小于256/2之内则不变 #限定得到的框在256/2 之内 小于256/2之内则不变 # subsample positive labels if we have too many #__C.TRAIN.RPN_FG_FRACTION = 0.5 #__C.TRAIN.RPN_BATCHSIZE = 256 num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many # 256 num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) #得到等于1的个数 如果是大于256/2则返回256/2 bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # 限定得到的框在256/2 之内 小于256/2之内则不变 # 所有archors边界可能超出图像,取在图像内部的archors的索引 bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) #得到在图像内部archors的坐标 anchors = all_anchors[inds_inside, :] bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #bbox_targets 得到移动的 dx dy dw dh #通过archors和archors对应的正样本计算坐标的偏移 bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets #cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS [1.0, 1.0, 1.0, 1.0] bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) # 正样本 的四个坐标的权重均设置为1 只是正样本 其他都是0 bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: #cfg.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0)#正负样本的个数 positive_weights = np.ones((1, 4)) * 1.0 / num_examples# 归一化的权重 negative_weights = np.ones((1, 4)) * 1.0 / num_examples# 归一化的权重 else: #cfg.TRAIN.RPN_POSITIVE_WEIGHT 默认是-1 如果是 在 0到1的值 assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) #这里不在 if里面了 bbox_outside_weights[labels == 1, :] = positive_weights# 归一化的权重 bbox_outside_weights[labels == 0, :] = negative_weights# 归一化的权重 # map up to original set of anchors # total_anchors = all_anchors.shape[0] #total_anchors得到 锚点的个数 N*9个 # inds_inside 所有archors边界可能超出图像,取在图像内部的archors的索引 #labels = np.empty((len(inds_inside),), dtype=np.float32) labels = _unmap(labels, total_anchors, inds_inside, fill=-1) # 函数的作用是 在特征图映射到原图的所有框中 把超出边界的 边框 的label置为 - 1 bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) #得到在图像内部archors的坐标 anchors = all_anchors[inds_inside, :] # bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) #bbox_targets 得到移动的 dx dy dw dh #把超出边框的 dx dy dw dh 置为0 bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) # 所有archors中正样本的四个坐标的权重均设置为1,其他为0 #归一化参数 把超出 边界的边框 的 归一化参数 置为0 bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels A = num_anchors #=9 #height, width = rpn_cls_score.shape[1:3] # rpn_cls_score [:, :, :, 18] # labels 这里 的label 是已经把 超出图像尺寸 与没超出的 尺寸 组合在一起 labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets # bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) # 得到在图像内部archors的坐标 anchors = all_anchors[inds_inside, :] # bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # bbox_targets 得到移动的 dx dy dw dh bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) # 所有archors中正样本的四个坐标的权重均设置为1,其他为0 # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights #返回的是 特征图映射到原图的 所有的边框 #把超出图像尺寸的 边框 置为-1 #在输入的所有的边框与标签中 帅选出 小于等于256/2 个正负样本 总共 就是256 样本 正为1负为0 其他为-1 #超出图像尺寸的边框的 label等置为-1 边框偏移量 0 边框权重0 边框权重的归一化参数0 # 标签正样本1,负0,不关注-1 (1, 1, A * height, width) # 边框 偏移量 是偏移量 dx dy dw dh 是中心坐标与 边框长度的偏移量(1, height, width, A * 4) # 边框权重1 (1, height, width, A * 4) # 边框权重的归一化参数 (1, height, width, A * 4) return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors): # rpn_cls_score: rpn分类得分 """Same as the anchor target layer in original Fast/er RCNN """ A = num_anchors total_anchors = all_anchors.shape[0] K = total_anchors / num_anchors # 统计平均每个anchor有几个框被选取 im_info = im_info[0] # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] # 只保留图像范围内的box,过滤掉不在图像范围内的box # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] # 打标签,首先全贴上-1,即don't care # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # 计算anchor和ground trueth的重叠率 # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # 读取每一行重叠率的最大值 argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # 返回与gt重合率最大的索引 gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # 若参数为False:将满足负样本阈值的anchor全部标记为0 # 若参数为True: 将满足负样本阈值且不满足正样本阈值的anchor标签设为0 if not cfg.FLAGS.rpn_clobber_positives: # assign bg labels first so that positive labels can clobber them # first set the negatives labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0 # fg label: for each gt, anchor with highest overlap # 前景1:对每一个gt框,重叠率最大得检测框标签设为1,即前景 labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU # 前景标签2:满足重叠率阈值的检测结果标签打为1 labels[max_overlaps >= cfg.FLAGS.rpn_positive_overlap] = 1 if cfg.FLAGS.rpn_clobber_positives: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.FLAGS.rpn_negative_overlap] = 0 # 如果正样本过多则重采样,使正负样本均衡 # subsample positive labels if we have too many num_fg = int(cfg.FLAGS.rpn_fg_fraction * cfg.FLAGS.rpn_batchsize) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # 对负样本进行同样操作 # subsample negative labels if we have too many num_bg = cfg.FLAGS.rpn_batchsize - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # 计算检测RoI与真实RoI的偏移 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) # only the positive ones have regression targets bbox_inside_weights[labels == 1, :] = np.array( cfg.FLAGS2["bbox_inside_weights"]) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.FLAGS.rpn_positive_weight < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.FLAGS.rpn_positive_weight > 0) & (cfg.FLAGS.rpn_positive_weight < 1)) positive_weights = (cfg.FLAGS.rpn_positive_weight / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.FLAGS.rpn_positive_weight) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) # labels # 改变label形状 labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # 计算bbox大小参数,给出每一个anchor框的输入权重、输出权重 # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)) rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def track(dets): beta = 0.5 n = len(dets) num_dets = dets[0]['boxes'].shape[0] depth = dets[0]['boxes'].shape[1] tmp = np.empty((0)) for i in xrange(n): tmp = np.hstack((tmp, dets[i]['pred_label'])) u_label = np.unique(tmp) rrrr = [] llll = [] ssss = [] for l in u_label: valid_dets = [] valid_score = [] # Filter out negative samples. for i in xrange(n): inds = np.where(np.logical_and(dets[i]['pred_label'] == l, dets[i]['pred_scores'][:, 0] > 0.1))[0] valid_dets.append(dets[i]['boxes'][inds]) valid_score.append(dets[i]['pred_scores'][inds, 0]) det_traces = [] det_scores = np.zeros((0,1)) # Viterbi if valid_score[0].size > 0: old_scores = np.expand_dims(valid_score[0], axis=1) old_trace = [] for i in xrange(old_scores.size): old_trace.append((i,)) for i in xrange(1, n): if valid_dets[i - 1].size == 0 and valid_dets[i].size > 0: old_scores = np.expand_dims(valid_score[i], axis=1) old_trace = [] for j in xrange(old_scores.size): old_trace.append((j + i * 100,)) elif valid_dets[i-1].size > 0 and valid_dets[i].size == 0: det_traces = det_traces + old_trace det_scores = np.vstack((det_scores, old_scores)) old_trace = [] old_scores = np.zeros((0)) elif valid_dets[i-1].size > 0 and valid_dets[i].size > 0: overlaps = bbox_overlaps( np.ascontiguousarray(valid_dets[i - 1][:, depth - 1], dtype=np.float), np.ascontiguousarray(valid_dets[i][:, depth - 1], dtype=np.float)) scores = beta * overlaps + old_scores argmax_scores = scores.argmax(axis=0) old_scores = np.expand_dims(scores.max(axis=0) + valid_score[i], axis=1) trace = [] for j in xrange(old_scores.size): trace.append(old_trace[argmax_scores[j]] + (j + i * 100,)) old_trace = trace if len(old_trace) > 0: det_traces = det_traces + old_trace det_scores = np.vstack((det_scores, old_scores)) boxes = [] for i in xrange(len(det_traces)): curr_boxes = np.empty((len(det_traces[i]) * 8, 5)) for j in xrange(len(det_traces[i])): idx = det_traces[i][j] % 100 ff = det_traces[i][j] / 100 curr_boxes[j * depth : (j + 1) * depth, 1 : 5] = dets[j]['boxes'][idx] curr_boxes[j * depth : (j + 1) * depth, 0] = np.arange(depth) + ff * depth boxes.append(curr_boxes) ssss = np.empty((0, 1)) while det_scores.size > 0: [r, s, boxes, det_scores, det_traces] = nms(boxes, det_scores, det_traces) rrrr.append(r) llll.append(l) ssss = np.vstack((ssss, s)) return rrrr, llll, ssss
def anchor_target_layer( gt_boxes, img_shape, all_anchors, is_restrict_bg=False): ''' Introduction: 为提取出的anchor打上标签, 即正,负, 不关心的样本。 正负样本总数为256, 若正样本少于128,则补充采样的负样本 并计算出与groundtruth box的偏差量。 :param gt_boxes: [-1, 5]: [xmin, ymin, xmax, ymax, label] "gt_boxes is groundtruth box" :param img_shape: [1, h, w, 3] :param all_anchors: [-1, 4]: [xmin, ymin, xmax, ymax] :param is_restrict_bg: :return: rpn_labels [-1, 1], rpn_bbox_targets [-1, 4] ''' """Same as the anchor target layer in original Fast/er RCNN """ total_anchors = all_anchors.shape[0] img_h, img_w = img_shape[1], img_shape[2] gt_boxes = gt_boxes[:, :-1] # remove class label # allow boxes to sit over the edge by a small amount _allowed_border = 0 # 允许框贴紧图像边缘的程度 # only keep anchors inside the image # 过滤掉不在图像范围内的boxes,首先用where函数加条件筛选出索引 inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < img_w + _allowed_border) & # width (all_anchors[:, 3] < img_h + _allowed_border) # height )[0] # 用索引切片出满足条件的anchor anchors = all_anchors[inds_inside, :] # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside),), dtype=np.float32) labels.fill(-1) # 首先为anchor全打上dont care的标记 # overlaps between the anchors and the gt boxes # bbox_overlaps函数计算的是两个框之间的IOU, 这里是计算 每个anchor 与 每个gtbox的IOU。 # overlaps: num_anchor行, num_gt列. 每一行为: 一个anchor与所有gtbox的IOU overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # argmax_overlaps: 计算每个anchor与哪个gtbox的IOU最大, 返回gtbox的索引 argmax_overlaps = overlaps.argmax(axis=1) # max_overlaps: 将所有与anchor有最大IOU的gtbox取出 max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # gt_argmax_overlaps: 计算每个gtbox与哪个anchor的IOU最大, 返回anchor的索引 gt_argmax_overlaps = overlaps.argmax(axis=0) # gt_max_overlaps: 将所有与gtbox有最大IOU的anchor取出 gt_max_overlaps = overlaps[ gt_argmax_overlaps, np.arange(overlaps.shape[1])] # gt_argmax_overlaps: 将IOU最大的那些anchor都捞出来 gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfgs.TRAIN_RPN_CLOOBER_POSITIVES: labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 labels[gt_argmax_overlaps] = 1 labels[max_overlaps >= cfgs.RPN_IOU_POSITIVE_THRESHOLD] = 1 if cfgs.TRAIN_RPN_CLOOBER_POSITIVES: labels[max_overlaps < cfgs.RPN_IOU_NEGATIVE_THRESHOLD] = 0 num_fg = int(cfgs.RPN_MINIBATCH_SIZE * cfgs.RPN_POSITIVE_RATE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 num_bg = cfgs.RPN_MINIBATCH_SIZE - np.sum(labels == 1) if is_restrict_bg: num_bg = max(num_bg, num_fg * 1.5) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 # 转换为偏差量 bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) # labels = labels.reshape((1, height, width, A)) rpn_labels = labels.reshape((-1, 1)) # bbox_targets bbox_targets = bbox_targets.reshape((-1, 4)) rpn_bbox_targets = bbox_targets return rpn_labels, rpn_bbox_targets
def check_relationship_recall(gt_objects, gt_relationships, cls_r, inds_s, inds_o, boxes_s, boxes_o, top_Ns, thres=0.5, use_gt_boxes=True, union_overlap=True): def box_union(box1, box2): return np.concatenate((np.minimum( box1[:, :2], box2[:, :2]), np.maximum(box1[:, 2:], box2[:, 2:])), 1) boxes_union = box_union(boxes_s, boxes_o) # rearrange the ground truth gt_rel_sub_idx, gt_rel_obj_idx = np.where( gt_relationships > 0) # ground truth number gt_sub = gt_objects[gt_rel_sub_idx, :5] gt_obj = gt_objects[gt_rel_obj_idx, :5] gt_rel = gt_relationships[gt_rel_sub_idx, gt_rel_obj_idx] gt_union = box_union(gt_sub, gt_obj) rel_cnt = len(gt_rel) rel_correct_cnt = np.zeros(len(top_Ns)) sub_overlaps = bbox_overlaps( np.ascontiguousarray(boxes_s[:, :4], dtype=np.float), np.ascontiguousarray(gt_sub[:, :4], dtype=np.float)) obj_overlaps = bbox_overlaps( np.ascontiguousarray(boxes_o[:, :4], dtype=np.float), np.ascontiguousarray(gt_obj[:, :4], dtype=np.float)) union_overlaps = bbox_overlaps( np.ascontiguousarray(boxes_union[:, :4], dtype=np.float), np.ascontiguousarray(gt_union[:, :4], dtype=np.float)) for idx, top_N in enumerate(top_Ns): if use_gt_boxes: for gt_id in xrange(rel_cnt): fg_candidate = np.where( np.logical_and(sub_overlaps[:, gt_id] == 1, obj_overlaps[:, gt_id] == 1))[0] for candidate_id in fg_candidate: for cls_id in range(cls_r[idx].shape[1]): if cls_r[idx][candidate_id, cls_id] == gt_rel[gt_id]: rel_correct_cnt[idx] += 1 break elif union_overlap: for gt_id in xrange(rel_cnt): flag = 0 fg_candidate = np.where(union_overlaps[:, gt_id] >= thres)[0] for candidate_id in fg_candidate: if flag == 1: break for cls_id in range(cls_r[idx].shape[1]): if cls_r[idx][candidate_id, cls_id] == gt_rel[gt_id] and \ inds_s[candidate_id] == gt_sub[gt_id, 4] and \ inds_o[candidate_id] == gt_obj[gt_id, 4]: rel_correct_cnt[idx] += 1 flag = 1 break else: for gt_id in xrange(rel_cnt): fg_candidate = np.where( np.logical_and(sub_overlaps[:, gt_id] >= thres, obj_overlaps[:, gt_id] >= thres))[0] for candidate_id in fg_candidate: for cls_id in range(cls_r[idx].shape[1]): if cls_r[idx][candidate_id, cls_id] == gt_rel[gt_id] and \ inds_s[candidate_id] == gt_sub[gt_id, 4] and \ inds_o[candidate_id] == gt_obj[gt_id, 4]: rel_correct_cnt[idx] += 1 break return rel_cnt, rel_correct_cnt
def imdb_rpn_compute_stats(net, imdb, anchor_scales=(8, 16, 32), feature_stride=16): raw_anchors = generate_anchors(scales=np.array(anchor_scales)) print raw_anchors.shape sums = 0 squred_sums = 0 counts = 0 roidb = filter_roidb(imdb.roidb) # Compute a map of input image size and output feature map blob map_w = {} map_h = {} for i in xrange(50, cfg.TRAIN.MAX_SIZE + 10): blobs = { 'data': np.zeros((1, 3, i, i)), 'im_info': np.asarray([[i, i, 1.0]]) } net.blobs['data'].reshape(*(blobs['data'].shape)) net.blobs['im_info'].reshape(*(blobs['im_info'].shape)) blobs_out = net.forward(data=blobs['data'].astype(np.float32, copy=False), im_info=blobs['im_info'].astype(np.float32, copy=False)) height, width = net.blobs['rpn/output'].data.shape[-2:] map_w[i] = width map_h[i] = height for i in xrange(len(roidb)): if not i % 5000: print 'computing %d/%d' % (i, imdb.num_images) im = cv2.imread(roidb[i]['image']) im_data, im_info = _get_image_blob(im) gt_boxes = roidb[i]['boxes'] gt_boxes = gt_boxes * im_info[0, 2] height = map_h[im_data.shape[2]] width = map_w[im_data.shape[3]] # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * feature_stride shift_y = np.arange(0, height) * feature_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = raw_anchors.shape[0] K = shifts.shape[0] all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) # only keep anchors inside the image inds_inside = np.where((all_anchors[:, 0] >= 0) & (all_anchors[:, 1] >= 0) & (all_anchors[:, 2] < im_info[0, 1]) & # width (all_anchors[:, 3] < im_info[0, 0]) # height )[0] # keep only inside anchors anchors = all_anchors[inds_inside, :] overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) # There are 2 types of bbox targets # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0] # 2. anchors which best match certain gt gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps))) gt_rois = gt_boxes[argmax_overlaps, :] anchors = anchors[fg_inds, :] gt_rois = gt_rois[fg_inds, :] targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32, copy=False) sums += targets.sum(axis=0) squred_sums += (targets**2).sum(axis=0) counts += targets.shape[0] means = sums / counts stds = np.sqrt(squred_sums / counts - means**2) print means print stds return means, stds
def forward(self, bottom, top): # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert bottom[0].data.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = bottom[0].data.shape[-2:] # GT boxes (x1, y1, x2, y2, label) gt_boxes = bottom[1].data # im_info im_info = bottom[2].data[0, :] if DEBUG: print '' print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) print 'scale: {}'.format(im_info[2]) print 'height, width: ({}, {})'.format(height, width) print 'rpn: gt_boxes.shape', gt_boxes.shape print 'rpn: gt_boxes', gt_boxes # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * self._feat_stride shift_y = np.arange(0, height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = self._num_anchors K = shifts.shape[0] all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -self._allowed_border) & (all_anchors[:, 1] >= -self._allowed_border) & (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height )[0] if DEBUG: print 'total_anchors', total_anchors print 'inds_inside', len(inds_inside) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print 'anchors.shape', anchors.shape # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) gt_boxes = gt_boxes.reshape(gt_boxes.shape[0], gt_boxes.shape[1]) overlaps = bbox_overlaps( np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice( fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice( bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: self._sums += bbox_targets[labels == 1, :].sum(axis=0) self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) self._counts += np.sum(labels == 1) means = self._sums / self._counts stds = np.sqrt(self._squared_sums / self._counts - means ** 2) print 'means:' print means print 'stdevs:' print stds # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print 'rpn: max max_overlap', np.max(max_overlaps) print 'rpn: num_positive', np.sum(labels == 1) print 'rpn: num_negative', np.sum(labels == 0) self._fg_sum += np.sum(labels == 1) self._bg_sum += np.sum(labels == 0) self._count += 1 print 'rpn: num_positive avg', self._fg_sum / self._count print 'rpn: num_negative avg', self._bg_sum / self._count # labels labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) top[0].reshape(*labels.shape) top[0].data[...] = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) top[1].reshape(*bbox_targets.shape) top[1].data[...] = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_inside_weights.shape[2] == height assert bbox_inside_weights.shape[3] == width top[2].reshape(*bbox_inside_weights.shape) top[2].data[...] = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) assert bbox_outside_weights.shape[2] == height assert bbox_outside_weights.shape[3] == width top[3].reshape(*bbox_outside_weights.shape) top[3].data[...] = bbox_outside_weights
def forward(self,bottom,top): #load image # (img_fn, tag_fn) = self.get_next_image() (img, bbs) = self.get_next_image() #print img_fn # img = misc.imread(img_fn) (img,pos,zoom_ratio) = random_zoomout(img) img_height = np.shape(img)[0] img_width = np.shape(img)[1] img = misc.imresize(img,(self.resize_height, self.resize_width)) minv = np.min(img) maxv = np.max(img) if minv == maxv: norm_img = np.zeros((self.resize_height, self.resize_width, 3), dtype=np.float32) else: norm_img = (np.float32(img) - minv) / (maxv - minv) - 0.5 if len(norm_img.shape)==2: top[0].data[0,0,:,:]=norm_img else: top[0].data[0,:,:,:]=np.transpose(norm_img, (2,0,1)) # 0 xmin 1 ymin 2 w 3 h bbs[:,0] = bbs[:,0]*zoom_ratio + pos[0] bbs[:,1] = bbs[:,1]*zoom_ratio + pos[1] bbs[:,2] = bbs[:,2]*zoom_ratio bbs[:,3] = bbs[:,3]*zoom_ratio bbs[:,0] = bbs[:,0]*self.resize_width/img_width bbs[:,2] = bbs[:,2]*self.resize_width/img_width bbs[:,1] = bbs[:,1]*self.resize_height/img_height bbs[:,3] = bbs[:,3]*self.resize_height/img_height #compute all ious feature_map_height = self.resize_height / self.sliding_window_stride feature_map_width = self.resize_width / self.sliding_window_stride size_num = len(self.sliding_window_height) anchor_bbs = np.zeros((size_num*feature_map_height*feature_map_width,4),dtype = np.float64) for size_index in range(size_num): h=self.sliding_window_height[size_index] w=self.sliding_window_width[size_index] xs = np.arange(feature_map_width) * self.sliding_window_stride + self.sliding_window_stride/2-1 - w/2 for y_index in range(feature_map_height): y=y_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - h/2 ind = size_index*feature_map_height*feature_map_width + y_index*feature_map_width anchor_bbs[ind : ind + feature_map_width,0] = xs anchor_bbs[ind : ind + feature_map_width,2] = xs + w anchor_bbs[ind : ind + feature_map_width,1] = y anchor_bbs[ind : ind + feature_map_width,3] = y + h bbs2 = np.zeros((len(bbs),4), dtype = np.float64) bbs2[:,0:2] = bbs[:,0:2] bbs2[:,2:4] = bbs[:,0:2] + bbs[:,2:4] iou = cython_bbox.bbox_overlaps(anchor_bbs,bbs2) #anchor box and gt box assignment pos_anchor=list() anchor_fired_bbs = list() neg_anchor=list() bbs_fire_list = np.zeros(len(bbs),dtype=np.int8) for size_index in range(size_num): h=self.sliding_window_height[size_index] w=self.sliding_window_width[size_index] for y_index in range(feature_map_height): y=y_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - h/2 for x_index in range(feature_map_width): x=x_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - w/2 anchor_box = [x,y,w,h, x_index, y_index, size_index] anchor_index = size_index*feature_map_height*feature_map_width + y_index*feature_map_width + x_index fired_bb = np.where(iou[anchor_index,:] > self.iou_positive_thres)[0] max_iou = np.max(iou[anchor_index,:]) if max_iou < self.iou_negative_thres: neg_anchor.append(anchor_box) elif max_iou > self.iou_positive_thres: pos_anchor.append(anchor_box) bb_ind = int(fired_bb[np.random.randint(len(fired_bb))]) anchor_fired_bbs.append(bb_ind) bbs_fire_list[bb_ind] = 1 for j in range(len(bbs)): if bbs_fire_list[j] > 0: continue #this gt bb has been assigned an anchor box # print 'bbs[%d] is un-assigned' % j max_iou_anchor_ind = np.argmax(iou[:,j]) size_index = max_iou_anchor_ind / (feature_map_height*feature_map_width) y_index = (max_iou_anchor_ind % (feature_map_height*feature_map_width) ) / feature_map_width x_index = max_iou_anchor_ind % feature_map_width h=self.sliding_window_height[size_index] w=self.sliding_window_width[size_index] x=x_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - w/2 y=y_index*self.sliding_window_stride + self.sliding_window_stride/2-1 - h/2 anchor_box = [x,y,w,h, x_index, y_index, size_index] pos_anchor.append(anchor_box) anchor_fired_bbs.append(j) pos_anchor = np.array(pos_anchor) anchor_fired_bbs = np.array(anchor_fired_bbs) neg_anchor = np.array(neg_anchor) #sampling from pos_anchor and neg_anchor sampling_param = np.zeros([self.batch_size, 7], dtype=np.float32) tags = np.zeros([1, 5*len(self.sliding_window_width),feature_map_height,feature_map_width],dtype=np.float32) rnd_perm = np.random.permutation(len(pos_anchor)) pos_anchor = pos_anchor[rnd_perm] anchor_fired_bbs = anchor_fired_bbs[rnd_perm] neg_anchor = np.random.permutation(neg_anchor) pos_num_in_batch = min([self.batch_size,len(pos_anchor)]) for i in range(pos_num_in_batch): x = pos_anchor[i][0] y = pos_anchor[i][1] w = pos_anchor[i][2] h = pos_anchor[i][3] x_index = pos_anchor[i][4] y_index = pos_anchor[i][5] size_index = pos_anchor[i][6] tags[0,0+5*size_index,y_index,x_index]=1.0 gt = bbs[anchor_fired_bbs[i]] tags[0,1+5*size_index,y_index,x_index]=(gt[0] + 0.5*gt[2] - x - 0.5*w) / w tags[0,2+5*size_index,y_index,x_index]=(gt[1] + 0.5*gt[3] - y - 0.5*h) / h tags[0,3+5*size_index,y_index,x_index]=np.log(np.float32(gt[2])/w) tags[0,4+5*size_index,y_index,x_index]=np.log(np.float32(gt[3])/h) sampling_param[i,:] = pos_anchor[i] if pos_num_in_batch < self.batch_size: neg_anchor_num = len(neg_anchor) for i in range(pos_num_in_batch,self.batch_size): sampling_param[i,:] = neg_anchor[(i - pos_num_in_batch) % neg_anchor_num] if np.random.randint(50)==0: print '[%s] pos_anchor: %d, neg_anchor:%d' % (self.py_fn, len(pos_anchor), len(neg_anchor)) top[1].data[...]=tags top[2].data[...]=sampling_param top[3].data[...]=bbs self.iter += 1