def create_roidb_from_box_list(self, box_list, gt_roidb): assert len(box_list) == self.num_images, \ 'Number of boxes must match number of ground-truth images' roidb = [] for i in range(self.num_images): boxes = box_list[i] num_boxes = boxes.shape[0] overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32) if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: gt_boxes = gt_roidb[i]['boxes'] gt_classes = gt_roidb[i]['gt_classes'] gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) argmaxes = gt_overlaps.argmax(axis=1) maxes = gt_overlaps.max(axis=1) I = np.where(maxes > 0)[0] overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] overlaps = scipy.sparse.csr_matrix(overlaps) roidb.append({ 'boxes': boxes, 'gt_classes': np.zeros((num_boxes,), dtype=np.int32), 'gt_overlaps': overlaps, 'flipped': False, 'seg_areas': np.zeros((num_boxes,), dtype=np.float32), }) return roidb
def score_of_edge(v1, v2, iouth, costtype): """ :param v1: live paths :param v2: frames :param iouth: :param costtype: :return: """ # Number of detections at frame t N2 = v2['boxes'].shape[0] score = np.zeros((1, N2)) iou = bbox_overlaps( np.ascontiguousarray(v2['boxes'], dtype=np.float), np.ascontiguousarray(v1['boxes'][-1].reshape(1, -1), dtype=np.float)) for i in range(0, N2): if iou.item(i) >= iouth: scores2 = v2['scores'][i] scores1 = v1['scores'][-1] # if len(v1['allScores'].shape)<2: # v1['allScores'] = v1['allScores'].reshape(1,-1) score_similarity = np.sqrt( np.sum(((v1['allScores'][-1, :].reshape(1, -1) - v2['allScores'][i, :].reshape(1, -1))**2))) if costtype == 'score': score[:, i] = scores2 elif costtype == 'scrSim': score[:, i] = 1.0 - score_similarity elif costtype == 'scrMinusSim': score[:, i] = scores2 + (1. - score_similarity) return score
def _sample_rois(all_rois, proposals, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) gt_boxes = proposals['gt_boxes'] gt_labels = proposals['gt_classes'] gt_scores = proposals['gt_scores'] overlaps = bbox_overlaps(np.ascontiguousarray(all_rois[0], dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) try: gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) except: pdb.set_trace() labels = gt_labels[gt_assignment, 0] cls_loss_weights = gt_scores[gt_assignment, 0] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= 0.5)[0] # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where(max_overlaps < 0.5)[0] labels[bg_inds] = 0 real_labels = np.zeros((labels.shape[0], 21)) for i in range(labels.shape[0]): real_labels[i, labels[i]] = 1 rois = all_rois return real_labels, rois, cls_loss_weights
def _compute_targets(rois, overlaps, labels): """Compute bounding-box regression targets for an image.""" # We are sampling relations from fg rois, hence each # fg box must be assigned to an gt box assert (cfg.TRAIN.FG_THRESH >= cfg.TRAIN.BBOX_THRESH) # Indices of ground-truth ROIs gt_inds = np.where(overlaps == 1)[0] if len(gt_inds) == 0: # Bail if the image has no ground-truth ROIs return np.zeros((rois.shape[0], 5), dtype=np.float32) else: # sanity check assert (gt_inds[0] == 0) for i in range(1, len(gt_inds)): assert (gt_inds[i] - gt_inds[i - 1] == 1) # Indices of examples for which we try to make predictions ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] # Get IoU overlap between each ex ROI and gt ROI ex_gt_overlaps = bbox_overlaps( np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) # Find which gt ROI each ex ROI has max overlap with: # this will be the ex ROI's gt target gt_assignment = ex_gt_overlaps.argmax(axis=1) # guarding against the case where a gt box doesn't get assigned to itself gt_to_ex_inds = [np.where(ex_inds == g)[0][0] for g in gt_inds] for i, g in enumerate(gt_to_ex_inds): gt_assignment[g] = gt_inds[i] # assign rois gt_rois = rois[gt_inds[gt_assignment], :] ex_rois = rois[ex_inds, :] # record target assignments for all foreground rois fg_gt_ind_assignment = {} for i, e in enumerate(ex_inds): if overlaps[e] >= cfg.TRAIN.FG_THRESH: fg_gt_ind_assignment[e] = gt_inds[gt_assignment[i]] # check if all gt has been assigned for g in gt_inds: assert (g in list(fg_gt_ind_assignment.values())) targets = np.zeros((rois.shape[0], 5), dtype=np.float32) targets[ex_inds, 0] = labels[ex_inds] # transfer to center and log # targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) return targets, fg_gt_ind_assignment
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes): """Generate a random sample of RoIs comprising foreground and background examples. """ # overlaps: (rois x gt_boxes) overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_boxes[gt_assignment, 4] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Guard against the case when an image has fewer than fg_rois_per_image # foreground RoIs fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size)) # Sample foreground regions without replacement if fg_inds.size > 0: fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] # Compute number of background RoIs to take from this image (guarding # against there being fewer than desired) bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) # Sample background regions without replacement if bg_inds.size > 0: bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_this_image), replace=False) # The indices that we're selecting (both fg and bg) keep_inds = np.append(fg_inds, bg_inds) # Select sampled values from various arrays: labels = labels[keep_inds] # Clamp labels for the background RoIs to 0 labels[fg_rois_per_this_image:] = 0 rois = all_rois[keep_inds] bbox_target_data = _compute_targets(rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) bbox_targets, bbox_inside_weights = \ _get_bbox_regression_labels(bbox_target_data, num_classes) return labels, rois, bbox_targets, bbox_inside_weights
def forward(self, boxes, im_labels, cls_prob_new, proposals): eps = 1e-9 cls_prob_new = cls_prob_new.clamp(eps, 1 - eps) num_images, num_classes = im_labels.shape assert num_images == 1, 'batch size shoud be equal to 1' # overlaps: (rois x gt_boxes) gt_boxes = proposals['gt_boxes'] gt_labels = proposals['gt_classes'].astype(np.long) gt_scores = proposals['gt_scores'] overlaps = bbox_overlaps( np.ascontiguousarray(boxes, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_labels[gt_assignment, 0] cls_loss_weights = gt_scores[gt_assignment, 0] # Select background RoIs as those with < FG_THRESH overlap bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0] labels[bg_inds] = 0 gt_assignment[bg_inds] = -1 ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0] cls_loss_weights[ig_inds] = 0.0 device_id = cls_prob_new.get_device() cls_loss_weights = torch.from_numpy(cls_loss_weights) labels = torch.from_numpy(labels) gt_assignment = torch.from_numpy(gt_assignment) gt_labels = torch.from_numpy(gt_labels) gt_scores = torch.from_numpy(gt_scores).cuda(device_id) loss = torch.tensor(0.).cuda(device_id) for i in range(len(gt_boxes)): p_mask = torch.where( gt_assignment == i, torch.ones_like(gt_assignment, dtype=torch.float), torch.zeros_like(gt_assignment, dtype=torch.float)).cuda(device_id) p_count = torch.sum(p_mask) if p_count > 0: mean_prob = torch.sum( cls_prob_new[:, gt_labels[i, 0]] * p_mask) / p_count loss = loss - torch.log(mean_prob) * p_count * gt_scores[i, 0] n_mask = torch.where(labels == 0, cls_loss_weights, torch.zeros_like( labels, dtype=torch.float)).cuda(device_id) loss = loss - torch.sum(torch.log(cls_prob_new[:, 0]) * n_mask) return loss / cls_prob_new.shape[0]
def _get_proposal_clusters(all_rois, proposals, im_labels, cls_prob): """Generate a random sample of RoIs comprising foreground and background examples. """ num_images, num_classes = im_labels.shape assert num_images == 1, 'batch size shoud be equal to 1' # overlaps: (rois x gt_boxes) gt_boxes = proposals['gt_boxes'] gt_labels = proposals['gt_classes'] gt_scores = proposals['gt_scores'] overlaps = bbox_overlaps(np.ascontiguousarray(all_rois, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) gt_assignment = overlaps.argmax(axis=1) max_overlaps = overlaps.max(axis=1) labels = gt_labels[gt_assignment, 0] cls_loss_weights = gt_scores[gt_assignment, 0] # Select foreground RoIs as those with >= FG_THRESH overlap fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] # Select background RoIs as those with < FG_THRESH overlap bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0] ig_inds = np.where(max_overlaps < cfg.TRAIN.BG_THRESH)[0] cls_loss_weights[ig_inds] = 0.0 labels[bg_inds] = 0 gt_assignment[bg_inds] = -1 img_cls_loss_weights = np.zeros(gt_boxes.shape[0], dtype=np.float32) pc_probs = np.zeros(gt_boxes.shape[0], dtype=np.float32) pc_labels = np.zeros(gt_boxes.shape[0], dtype=np.int32) pc_count = np.zeros(gt_boxes.shape[0], dtype=np.int32) for i in xrange(gt_boxes.shape[0]): po_index = np.where(gt_assignment == i)[0] if len(po_index) > 0: img_cls_loss_weights[i] = np.sum(cls_loss_weights[po_index]) pc_labels[i] = gt_labels[i, 0] pc_count[i] = len(po_index) pc_probs[i] = np.average(cls_prob[po_index, pc_labels[i]]) else: img_cls_loss_weights[i] = 0 pc_labels[i] = gt_labels[i, 0] pc_count[i] = 0 pc_probs[i] = 0 return labels, cls_loss_weights, gt_assignment, pc_labels, pc_probs, pc_count, img_cls_loss_weights
def _sample_rois(self, all_rois, proposals): gt_boxes = proposals['gt_boxes'] overlaps = bbox_overlaps( np.ascontiguousarray(all_rois[0], dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) try: max_overlaps = overlaps.max(axis=1) except: pdb.set_trace() fg_inds = np.where(max_overlaps >= 0.5)[0] # gt_index = np.where(max_overlaps == 1.0)[0] # fg_inds = np.array(list(set(fg_inds)-set(gt_index))) pos_samples = np.empty((0, 4)) if fg_inds.shape[0] != 0: pos_samples = np.vstack((pos_samples, all_rois[0][fg_inds, :])) return pos_samples
def choose_gt(boxes, cls_prob, im_labels): boxes = boxes[..., 1:] num_images, num_classes = im_labels.shape assert num_images == 1, 'batch size shoud be equal to 1' im_labels_tmp = im_labels[0, :] gt_boxes = np.zeros((0, 5), dtype=np.float32) if 21 == cls_prob.shape[2]: cls_prob = cls_prob[:, :, 1:] for i in range(num_classes): if im_labels_tmp[i] == 1: gt_boxes_tmp = np.zeros((1, 5), dtype=np.float32) cls_prob_tmp = cls_prob[:, :, i].data max_index = np.argmax(cls_prob_tmp) gt_boxes_tmp[:, 0:4] = boxes[:, max_index, :].reshape(1, -1) gt_boxes_tmp[:, 4] = i + 1 gt_boxes = np.vstack((gt_boxes, gt_boxes_tmp)) # choose pos samples by gt overlaps = bbox_overlaps(np.ascontiguousarray(boxes[0], dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) max_overlaps = overlaps.max(axis=1) fg_inds = np.where(max_overlaps >= 0.5)[0] pos_samples = np.empty((0, 4), dtype=np.float32) if fg_inds.shape[0] != 0: pos_samples = np.vstack((pos_samples, boxes[0][fg_inds, :])) pos_samples = np.hstack((np.zeros((pos_samples.shape[0], 1), dtype=np.float32), pos_samples)) pos_samples = Variable(torch.from_numpy(np.array([pos_samples])).cuda()) gt_boxes = np.array([gt_boxes]) gt_boxes = Variable(torch.from_numpy(gt_boxes)) if torch.cuda.is_available(): gt_boxes = gt_boxes.cuda() return gt_boxes, pos_samples
def evaluate_recall(self, candidate_boxes=None, thresholds=None, area='all', limit=None): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values areas = {'all': 0, 'small': 1, 'medium': 2, 'large': 3, '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7} area_ranges = [[0 ** 2, 1e5 ** 2], # all [0 ** 2, 32 ** 2], # small [32 ** 2, 96 ** 2], # medium [96 ** 2, 1e5 ** 2], # large [96 ** 2, 128 ** 2], # 96-128 [128 ** 2, 256 ** 2], # 128-256 [256 ** 2, 512 ** 2], # 256-512 [512 ** 2, 1e5 ** 2], # 512-inf ] assert area in areas, 'unknown area range: {}'.format(area) area_range = area_ranges[areas[area]] gt_overlaps = np.zeros(0) num_pos = 0 for i in range(self.num_images): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1) gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) & (max_gt_overlaps == 1))[0] gt_boxes = self.roidb[i]['boxes'][gt_inds, :] gt_areas = self.roidb[i]['seg_areas'][gt_inds] valid_gt_inds = np.where((gt_areas >= area_range[0]) & (gt_areas <= area_range[1]))[0] gt_boxes = gt_boxes[valid_gt_inds, :] num_pos += len(valid_gt_inds) if candidate_boxes is None: # If candidate_boxes is not supplied, the default is to use the # non-ground-truth boxes from this roidb non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0] boxes = self.roidb[i]['boxes'][non_gt_inds, :] else: boxes = candidate_boxes[i] if boxes.shape[0] == 0: continue if limit is not None and boxes.shape[0] > limit: boxes = boxes[:limit, :] overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float)) _gt_overlaps = np.zeros((gt_boxes.shape[0])) for j in range(gt_boxes.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps = overlaps.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps = overlaps.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps.argmax() gt_ovr = max_overlaps.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps[gt_ind] # record the iou coverage of this gt box _gt_overlaps[j] = overlaps[box_ind, gt_ind] assert (_gt_overlaps[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps[box_ind, :] = -1 overlaps[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps)) gt_overlaps = np.sort(gt_overlaps) if thresholds is None: step = 0.05 thresholds = np.arange(0.5, 0.95 + 1e-5, step) recalls = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls[i] = (gt_overlaps >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar = recalls.mean() return {'ar': ar, 'recalls': recalls, 'thresholds': thresholds, 'gt_overlaps': gt_overlaps}
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, data, _feat_stride=[ 16, ], anchor_scales=[4, 8, 16, 32]): """ Assign anchors to ground-truth targets. Produces anchor classification labels and bounding-box regression targets. """ _anchors = generate_anchors(scales=np.array(anchor_scales)) _num_anchors = _anchors.shape[0] if DEBUG: print('anchors:') print(_anchors) print('anchor shapes:') print( np.hstack(( _anchors[:, 2::4] - _anchors[:, 0::4], _anchors[:, 3::4] - _anchors[:, 1::4], ))) _counts = cfg.EPS _sums = np.zeros((1, 4)) _squared_sums = np.zeros((1, 4)) _fg_sum = 0 _bg_sum = 0 _count = 0 # allow boxes to sit over the edge by a small amount _allowed_border = 0 # map of shape (..., H, W) #height, width = rpn_cls_score.shape[1:3] im_info = im_info[0] # Algorithm: # # for each (H, W) location i # generate 9 anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the 9 anchors # filter out-of-image anchors # measure GT overlap assert rpn_cls_score.shape[0] == 1, \ 'Only single item batches are supported' # map of shape (..., H, W) height, width = rpn_cls_score.shape[1:3] if DEBUG: print('AnchorTargetLayer: height', height, 'width', width) print('') print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) print('height, width: ({}, {})'.format(height, width)) print('rpn: gt_boxes.shape', gt_boxes.shape) print('rpn: gt_boxes', gt_boxes) # 1. Generate proposals from bbox deltas and shifted anchors shift_x = np.arange(0, width) * _feat_stride shift_y = np.arange(0, height) * _feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() # add A anchors (1, A, 4) to # cell K shifts (K, 1, 4) to get # shift anchors (K, A, 4) # reshape to (K*A, 4) shifted anchors A = _num_anchors K = shifts.shape[0] all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape( (1, K, 4)).transpose((1, 0, 2))) all_anchors = all_anchors.reshape((K * A, 4)) total_anchors = int(K * A) # only keep anchors inside the image inds_inside = np.where( (all_anchors[:, 0] >= -_allowed_border) & (all_anchors[:, 1] >= -_allowed_border) & (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width (all_anchors[:, 3] < im_info[0] + _allowed_border) # height )[0] if DEBUG: print('total_anchors', total_anchors) print('inds_inside', len(inds_inside)) # keep only inside anchors anchors = all_anchors[inds_inside, :] if DEBUG: print('anchors.shape', anchors.shape) # label: 1 is positive, 0 is negative, -1 is dont care labels = np.empty((len(inds_inside), ), dtype=np.float32) labels.fill(-1) # overlaps between the anchors and the gt boxes # overlaps (ex, gt) overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float), np.ascontiguousarray(gt_boxes, dtype=np.float)) argmax_overlaps = overlaps.argmax(axis=1) max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] gt_argmax_overlaps = overlaps.argmax(axis=0) gt_max_overlaps = overlaps[gt_argmax_overlaps, np.arange(overlaps.shape[1])] gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels first so that positive labels can clobber them labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # fg label: for each gt, anchor with highest overlap labels[gt_argmax_overlaps] = 1 # fg label: above threshold IOU labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 if cfg.TRAIN.RPN_CLOBBER_POSITIVES: # assign bg labels last so that negative labels can clobber positives labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 # subsample positive labels if we have too many num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) fg_inds = np.where(labels == 1)[0] if len(fg_inds) > num_fg: disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) labels[disable_inds] = -1 # subsample negative labels if we have too many num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) bg_inds = np.where(labels == 0)[0] if len(bg_inds) > num_bg: disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) labels[disable_inds] = -1 #print "was %s inds, disabling %s, now %s inds" % ( #len(bg_inds), len(disable_inds), np.sum(labels == 0)) bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) bbox_inside_weights[labels == 1, :] = np.array( cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # uniform weighting of examples (given non-uniform sampling) num_examples = np.sum(labels >= 0) positive_weights = np.ones((1, 4)) * 1.0 / num_examples negative_weights = np.ones((1, 4)) * 1.0 / num_examples else: assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / np.sum(labels == 1)) negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / np.sum(labels == 0)) bbox_outside_weights[labels == 1, :] = positive_weights bbox_outside_weights[labels == 0, :] = negative_weights if DEBUG: _sums += bbox_targets[labels == 1, :].sum(axis=0) _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0) _counts += np.sum(labels == 1) means = _sums / _counts stds = np.sqrt(_squared_sums / _counts - means**2) print('means:') print(means) print('stdevs:') print(stds) # map up to original set of anchors labels = _unmap(labels, total_anchors, inds_inside, fill=-1) bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) if DEBUG: print('rpn: max max_overlap', np.max(max_overlaps)) print('rpn: num_positive', np.sum(labels == 1)) print('rpn: num_negative', np.sum(labels == 0)) _fg_sum += np.sum(labels == 1) _bg_sum += np.sum(labels == 0) _count += 1 print('rpn: num_positive avg', _fg_sum / _count) print('rpn: num_negative avg', _bg_sum / _count) # labels #pdb.set_trace() labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) labels = labels.reshape((1, 1, A * height, width)) rpn_labels = labels # bbox_targets bbox_targets = bbox_targets \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) rpn_bbox_targets = bbox_targets # bbox_inside_weights bbox_inside_weights = bbox_inside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_inside_weights.shape[2] == height #assert bbox_inside_weights.shape[3] == width rpn_bbox_inside_weights = bbox_inside_weights # bbox_outside_weights bbox_outside_weights = bbox_outside_weights \ .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) #assert bbox_outside_weights.shape[2] == height #assert bbox_outside_weights.shape[3] == width rpn_bbox_outside_weights = bbox_outside_weights return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
def evaluate_recall(self, scale, candidate_boxes=None, thresholds=None, limit=None, target='left'): """Evaluate detection proposal recall metrics. Returns: results: dictionary of results with keys 'ar': average recall 'recalls': vector recalls at each IoU overlap threshold 'thresholds': vector of IoU overlap thresholds 'gt_overlaps': vector of all ground-truth overlaps """ # Record max overlap value for each gt box # Return vector of overlap values gt_overlaps_left = np.zeros(0) max_overlaps_inx_left = np.zeros(0) gt_overlaps_right = np.zeros(0) max_overlaps_inx_right = np.zeros(0) num_pos = 0 for i in range(len(candidate_boxes)): # Checking for max_overlaps == 1 avoids including crowd annotations # (...pretty hacking :/) gt_inds = np.where((self.roidb[i]['boxes_left'][:,3] - self.roidb[i]['boxes_left'][:,1] >= 25) & (self.roidb[i]['occlusion'][:] <= 1) & (self.roidb[i]['truncation'][:] <= 0.3) & (self.roidb[i]['gt_classes'][:] == 1))[0] gt_boxes_left = self.roidb[i]['boxes_left'][gt_inds, :] gt_boxes_right = self.roidb[i]['boxes_right'][gt_inds, :] num_pos += len(gt_inds) boxes_left = candidate_boxes[i][:,:4]/scale boxes_right = candidate_boxes[i][:,4:]/scale if boxes_left.shape[0] == 0: continue if limit is not None and boxes_left.shape[0] > limit: boxes_left = boxes_left[:limit, :] boxes_right = boxes_right[:limit, :] overlaps_left = bbox_overlaps(boxes_left[:,:4].astype(np.float), gt_boxes_left.astype(np.float)) overlaps_right = bbox_overlaps(boxes_right[:,:4].astype(np.float), gt_boxes_right.astype(np.float)) # left _gt_overlaps_left = np.zeros((gt_boxes_left.shape[0])) _max_overlaps_inx_left = np.zeros((gt_boxes_left.shape[0]), dtype=int) for j in range(gt_boxes_left.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps_left = overlaps_left.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps_left = overlaps_left.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps_left.argmax() gt_ovr = max_overlaps_left.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps_left[gt_ind] # record the iou coverage of this gt box _gt_overlaps_left[j] = overlaps_left[box_ind, gt_ind] _max_overlaps_inx_left[j] = box_ind assert (_gt_overlaps_left[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps_left[box_ind, :] = -1 overlaps_left[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps_left = np.hstack((gt_overlaps_left, _gt_overlaps_left)) max_overlaps_inx_left = np.hstack((max_overlaps_inx_left, _max_overlaps_inx_left)) # right _gt_overlaps_right = np.zeros((gt_boxes_right.shape[0])) _max_overlaps_inx_right = np.zeros((gt_boxes_right.shape[0]), dtype=int) for j in range(gt_boxes_right.shape[0]): # find which proposal box maximally covers each gt box argmax_overlaps_right = overlaps_right.argmax(axis=0) # and get the iou amount of coverage for each gt box max_overlaps_right = overlaps_right.max(axis=0) # find which gt box is 'best' covered (i.e. 'best' = most iou) gt_ind = max_overlaps_right.argmax() gt_ovr = max_overlaps_right.max() assert (gt_ovr >= 0) # find the proposal box that covers the best covered gt box box_ind = argmax_overlaps_right[gt_ind] # record the iou coverage of this gt box _gt_overlaps_right[j] = overlaps_right[box_ind, gt_ind] _max_overlaps_inx_right[j] = box_ind assert (_gt_overlaps_right[j] == gt_ovr) # mark the proposal box and the gt box as used overlaps_right[box_ind, :] = -1 overlaps_right[:, gt_ind] = -1 # append recorded iou coverage level gt_overlaps_right = np.hstack((gt_overlaps_right, _gt_overlaps_right)) max_overlaps_inx_right = np.hstack((max_overlaps_inx_right, _max_overlaps_inx_right)) #gt_overlaps_left = np.sort(gt_overlaps_left) if thresholds is None: step = 0.05 thresholds = np.arange(0.1, 0.95 + 1e-5, step) recalls_left = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls_left[i] = (gt_overlaps_left >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar_left = recalls_left.mean() recalls_right = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls_right[i] = (gt_overlaps_right >= t).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar_right = recalls_right.mean() recalls_stereo = np.zeros_like(thresholds) # compute recall for each iou threshold for i, t in enumerate(thresholds): recalls_stereo[i] = ((gt_overlaps_left >= t)&(gt_overlaps_right >= t)&(max_overlaps_inx_right >= max_overlaps_inx_left)).sum() / float(num_pos) # ar = 2 * np.trapz(recalls, thresholds) ar_stereo = recalls_stereo.mean() return {'ar_left': ar_left, 'recalls_left': recalls_left,\ 'ar_right': ar_right, 'recalls_right': recalls_right,\ 'ar_stereo': ar_stereo, 'recalls_stereo': recalls_stereo, 'thresholds': thresholds}