Example #1
0
def _compute_targets(gt_rois, ex_rois):
    """Compute bounding-box regression targets for an image.
        gt_rois: ground truth rois
        ex_rois: example rois
    """
    K = ex_rois.shape[0]
    N = gt_rois.shape[0]
    # Ensure ROIs are floats
    gt_rois = gt_rois.astype(np.float, copy=False)
    ex_rois = ex_rois.astype(np.float, copy=False)

    # bbox targets: (x1,y1,x2,y2,ex_rois_ind,subreg_ind)  
    targets = np.zeros((0, 7), dtype=np.float32)
    
    if K == 0 or N == 0:
        return targets
    
    # For each region, find out objects that are adjacent
    # Match objects to sub-regions with maximum overlaps. 
    # Objects with large overlaps with any sub-regions are given priority.
    overlaps = bbox_overlaps(ex_rois, gt_rois)
    max_overlaps = overlaps.max(axis=1)

    for k in xrange(K):
        
        if max_overlaps[k] < cfg.SEAR.ADJ_THRESH:
            continue
        
        re = ex_rois[k, :]
        L = np.array([[re[2]-re[0], re[3]-re[1], re[2]-re[0], re[3]-re[1]]]) 
        delta = np.array([[re[0], re[1], re[0], re[1]]])
        # sub-regions`
        s_re = (L * cfg.SEAR.SUBREGION) + delta
        s_re = s_re.astype(np.float, copy=False)
        # compute the overlaps between sub-regions and each objects
        sre_gt_overlaps = bbox_overlaps(s_re, gt_rois)
        # find out the objects that are actually adjacent
        adj_th = (sre_gt_overlaps[0] >= cfg.SEAR.ADJ_THRESH)
        match_inds = np.where(adj_th)[0]
        sre_gt_overlaps[:, ~adj_th] = -1
#        adj_th = (sre_gt_overlaps >= cfg.SEAR.ADJ_THRESH)
#        match_inds = np.where(np.any(adj_th, axis=0))[0]
        if match_inds.shape[0]>0:    # there is object to match
            for _ in xrange(min(cfg.SEAR.NUM_SUBREG, match_inds.shape[0])):            
                reg_idx, gt_idx = np.unravel_index(sre_gt_overlaps.argmax(), 
                                                   sre_gt_overlaps.shape)
                
                # no more valid match
#                if sre_gt_overlaps[reg_idx, gt_idx] < cfg.SEAR.ADJ_THRESH:
#                    break
                t_ki = _compute_bbox_deltas(ex_rois[[k], :],
                                            gt_rois[[gt_idx], :])                
                new_target = np.hstack((t_ki, np.array([[k, reg_idx, overlaps[k, gt_idx]]])))
                targets = np.vstack((targets, new_target))                

                sre_gt_overlaps[reg_idx, :] = -1
                sre_gt_overlaps[:, gt_idx] = -1

    return targets
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, dontcare):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    
    # rm dontcare in bg_inds
    if dontcare.size != 0:
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_rois[bg_inds, 1:5], dtype=np.float),
            np.ascontiguousarray(dontcare, dtype=np.float))
        max_overlaps = overlaps.max(axis=1)
        rm_inds=np.where(max_overlaps < cfg.TRAIN.FG_THRESH)[0]
        bg_inds = np.array([bg_inds[i] for i in rm_inds],dtype=np.int)
    
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
    #print 'bg_inds size = %d'%bg_inds.size
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, bbox_targets, bbox_inside_weights
Example #3
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, "Number of boxes must match number of ground-truth images"
        roidb = []
        for i in xrange(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None:
                gt_boxes = gt_roidb[i]["boxes"]
                gt_classes = gt_roidb[i]["gt_classes"]
                gt_overlaps = bbox_overlaps(boxes.astype(np.float), gt_boxes.astype(np.float))
                if gt_overlaps.shape[1] > 0:
                    argmaxes = gt_overlaps.argmax(axis=1)
                    maxes = gt_overlaps.max(axis=1)
                    I = np.where(maxes > 0)[0]
                    overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
                else:
                    overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append(
                {
                    "boxes": boxes,
                    "gt_classes": np.zeros((num_boxes,), dtype=np.int32),
                    "gt_overlaps": overlaps,
                    "flipped": False,
                }
            )
        return roidb
Example #4
0
def _anchor_target_layer(anchors, gt_boxes, im_info, feat_stride, num_anchors, rpn_cls_score):
    height, width = rpn_cls_score.shape[1:3]
    indexs = np.where((anchors[:, 0] > 0) &
                      (anchors[:, 1] > 0) &
                      (anchors[:, 2] < width*feat_stride) &
                      (anchors[:, 3] < height*feat_stride))[0]
    inside_anchors = anchors[indexs]

    labels = np.zeros((len(indexs),), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(inside_anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    arg_max_overlaps = np.argmax(overlaps, axis=1)
    max_overlaps = overlaps[np.arange(overlaps.shape[0]), arg_max_overlaps]
    gt_arg_max_overlaps = np.argmax(overlaps, axis=0)
    gt_max_overlaps = overlaps[gt_arg_max_overlaps, np.arange(overlaps.shape[1])]

    gt_arg_max_overlaps = np.where(overlaps == gt_max_overlaps)

    labels[max_overlaps < 0.3] = 0
    labels[gt_arg_max_overlaps] = 1
    labels[max_overlaps > 0.7] = 1
Example #5
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in xrange(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({
                'boxes' : boxes,
                'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
                'gt_overlaps' : overlaps,
                'flipped' : False,
                'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
            })
        return roidb
def _compute_targets(rois, overlaps, labels):
    """Compute bounding-box regression targets for an image."""
    # Indices of ground-truth ROIs
    gt_inds = np.where(overlaps == 1)[0]
    if len(gt_inds) == 0:
        # Bail if the image has no ground-truth ROIs
        return np.zeros((rois.shape[0], 5), dtype=np.float32)
    # Indices of examples for which we try to make predictions
    ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]

    # Get IoU overlap between each ex ROI and gt ROI
    ex_gt_overlaps = bbox_overlaps(
        np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
        np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))

    # Find which gt ROI each ex ROI has max overlap with:
    # this will be the ex ROI's gt target
    gt_assignment = ex_gt_overlaps.argmax(axis=1)
    gt_rois = rois[gt_inds[gt_assignment], :]
    ex_rois = rois[ex_inds, :]

    targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
    targets[ex_inds, 0] = labels[ex_inds]
    targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
    return targets
Example #7
0
    def extract_pos_and_neg_feat(self):
        # extract positive features
        pos = {}
        neg = np.zeros([50000, 4096])
        for i in range(20):
            pos[i] = np.zeros([0, 4096])
        neg_cnt = 0
        for i in range(len(self.imdb.image_index)):
            print(str(i) + " "),
            data = sio.loadmat(os.path.join(self.DATA_ROOT_PATH, 
                self.imdb.image_index[i]))
            boxes = data['boxes']
            feat = data['feat']
            black_list = []
            gt_boxes   = gts['boxes'][0][i]
            gt_classes = gts['class'][0][i]
            overlaps = bbox_overlaps(gt_boxes.astype(np.float),
                    boxes.astype(np.float))
            for idx, gt_box in enumerate(gt_boxes):
                for j in range(boxes.shape[0]):
                    box = boxes[j,:] # [x1 y1 x2 y2]
                    if overlaps[idx, j] > 0.5:
                        cls = gt_classes[idx][0] - 1
                        pos[cls] = np.row_stack([pos[cls], feat[j,:]]) 
                    if overlaps[idx, j] > 0.2:
                        black_list.append(idx)

            if neg_cnt < neg.shape[0]:
                cand = set(range(feat.shape[0])) - set(black_list)
                rndidx = np.random.permutation(range(len(cand)))[0:50]
                negidx = np.array(list(cand))[rndidx]
                neg_feat = feat[negidx, :]
                neg[neg_cnt:neg_cnt+50] = neg_feat

        return pos
Example #8
0
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes,sample_type='fpn', k0 = 4):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    if sample_type == 'fpn':
        #print 0
        w = (rois[:,3]-rois[:,1])
        h = (rois[:,4]-rois[:,2])
        s = w * h
        s[s<=0]=1e-6
        layer_index = np.floor(k0+np.log2(np.sqrt(s)/224))

        layer_index[layer_index<2]=2
        layer_index[layer_index>5]=5
        #print 1
        return rois, labels, bbox_targets, bbox_inside_weights, layer_index #rois:[512,5]   labels:[512,]
    else:
        return rois, labels, bbox_targets, bbox_inside_weights
Example #9
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images. %s vs. %s' % (len(box_list), self.num_images)
        roidb = []
        print 'create_roidb_from_box_list() start'
        for i in xrange(self.num_images):
            max_proposal_box = cfg.MAX_PROPOSAL_NO
            boxes = box_list[i][:max_proposal_box]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None:
                gt_boxes = gt_roidb[i]['gt_boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({'boxes' : boxes,
                          'gt_classes' : np.zeros((num_boxes,),
                                                  dtype=np.int32),
                          'gt_overlaps' : overlaps,
                          'flipped' : False})
        print 'create_roidb_from_box_list() end'
        return roidb
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):
  """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
  # overlaps: (rois x gt_boxes)
  overlaps = bbox_overlaps(
    np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
    np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
  gt_assignment = overlaps.argmax(axis=1)
  max_overlaps = overlaps.max(axis=1)
  if cfg.FRAME_REG:
    labels = gt_boxes[gt_assignment, 12]
  else:
    labels = gt_boxes[gt_assignment, 4]
  # Select foreground RoIs as those with >= FG_THRESH overlap
  fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
  # Guard against the case when an image has fewer than fg_rois_per_image
  # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
  bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                     (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]

  # Small modification to the original version where we ensure a fixed number of regions are sampled
  if fg_inds.size > 0 and bg_inds.size > 0:
    fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
    fg_inds = npr.choice(fg_inds, size=int(fg_rois_per_image), replace=False)
    bg_rois_per_image = rois_per_image - fg_rois_per_image
    to_replace = bg_inds.size < bg_rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(bg_rois_per_image), replace=to_replace)
  elif fg_inds.size > 0:
    to_replace = fg_inds.size < rois_per_image
    fg_inds = npr.choice(fg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = rois_per_image
  elif bg_inds.size > 0:
    to_replace = bg_inds.size < rois_per_image
    bg_inds = npr.choice(bg_inds, size=int(rois_per_image), replace=to_replace)
    fg_rois_per_image = 0
  else:
    import pdb
    pdb.set_trace()

  # The indices that we're selecting (both fg and bg)
  keep_inds = np.append(fg_inds, bg_inds)
  # Select sampled values from various arrays:
  labels = labels[keep_inds]
  # Clamp labels for the background RoIs to 0
  labels[int(fg_rois_per_image):] = 0
  rois = all_rois[keep_inds]
  roi_scores = all_scores[keep_inds]
  if cfg.FRAME_REG:
    p = 12
  else:
    p = 4
  bbox_target_data, poly_target_data = _compute_targets(
    rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :p], labels)

  bbox_targets, bbox_inside_weights, poly_targets, poly_inside_weights = \
    _get_bbox_regression_labels(bbox_target_data, poly_target_data, num_classes)
  return labels, rois, roi_scores, bbox_targets, bbox_inside_weights,\
         poly_targets, poly_inside_weights
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes):#, pose_a, pose_e):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]
    poses_a = gt_boxes[gt_assignment, 5]
    poses_e = gt_boxes[gt_assignment, 6]
    poses_t = gt_boxes[gt_assignment, 7]
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    poses_a = poses_a[keep_inds]
    poses_e = poses_e[keep_inds]
    poses_t = poses_t[keep_inds]
    #for p in xrange(int(fg_rois_per_this_image)):
    #    labels[p] = (labels[p]-1) * 24 + poses_a[p]+1 
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    poses_a[fg_rois_per_this_image:] = -1
    poses_e[fg_rois_per_this_image:] = -1
    poses_t[fg_rois_per_this_image:] = -1
    rois = all_rois[keep_inds]
    print zip(labels,poses_a)
    #pose_a, pose_e = _get_pose_labels(pose_a, pose_e, len(rois), int(fg_rois_per_this_image))
    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(bbox_target_data, num_classes)
    return labels, rois, bbox_targets, bbox_inside_weights, poses_a, poses_e, poses_t
Example #12
0
def calc_precision_recall(all_boxes, imdb):
    res_num = {'tp': 0, 'gt': 0, 'det': 0, 'bad_case': 0}
    
    # save bad case result 
    bad_case_output_dir = os.path.join(cfg.ROOT_DIR, 'data', 'bad_case_'+imdb.name)
    if not os.path.exists(bad_case_output_dir):
        os.makedirs(bad_case_output_dir)
    else:
        for f in os.listdir(bad_case_output_dir):
            os.remove(os.path.join(bad_case_output_dir, f))

    gt_roidb = imdb.roidb
    outside_pad = 10
    bounding = lambda box, gt_box: np.all((box[:2] <= gt_box[:2] + outside_pad) & 
                                          (box[2:] >= gt_box[2:] - outside_pad))

    for im_i, boxes in enumerate(all_boxes):
        gt_boxes = gt_roidb[im_i]['boxes']
        gt_overlaps = bbox_overlaps(boxes[:,:-1].astype(np.float), 
                                    gt_boxes.astype(np.float))
        argmaxes = gt_overlaps.argmax(axis=1)
        """ 
        maxes = gt_overlaps.max(axis=1)
        tp_inds = np.where(maxes >= 0.7)[0]
        """
        tp_inds = np.zeros((argmaxes.shape[0]), dtype=bool)
        for box_i, box in enumerate(boxes):
            if bounding(box[:-1], gt_boxes[argmaxes[box_i]]):
                tp_inds[box_i] = True

        tp_argmaxes = argmaxes[tp_inds]
        tp_argmaxes = np.unique(tp_argmaxes)
        tp_num = tp_argmaxes.size
        
        res_num['tp'] = res_num['tp'] + tp_num
        res_num['gt'] = res_num['gt'] + len(gt_boxes)
        res_num['det'] = res_num['det'] + len(boxes)

        if tp_num != len(boxes) or tp_num != len(gt_boxes):
            res_num['bad_case'] = res_num['bad_case'] + 1
            img_path = imdb.image_path_at(im_i)
            im = cv2.imread(img_path)
            bad_name = os.path.splitext(os.path.basename(img_path))[0]
            res_im_file = os.path.join(bad_case_output_dir, '{:s}.jpg'.format(bad_name))
            save_detection_res(im, res_im_file, boxes, gt_boxes)
            print 'images: {:d}/{:d}  !!!  BAD CASE'.format(im_i, len(all_boxes))
        else:
            print 'images: {:d}/{:d}'.format(im_i, len(all_boxes))

    print '=' * 20
    print 'final bad case number: {:d}'.format(res_num['bad_case'])
    print 'final precision: {:.3f}, recall: {:.3f}.'.format(
                                        float(res_num['tp'])/float(res_num['det']), 
                                        float(res_num['tp'])/float(res_num['gt']))
    print '=' * 20    
Example #13
0
def compare(name,dets,thresh):
    suppressed=nms_proposal(dets,thresh)
    gt = nms_gt(name)

    overlaps = bbox_overlaps(
            np.ascontiguousarray(dets[:,0:4], dtype=np.float),
            np.ascontiguousarray(gt, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(overlaps)), argmax_overlaps]
    site = np.where(max_overlaps > 0.5)

    gt_sup=np.array(suppressed)
    gt_sup[site]=(argmax_overlaps[site]+1)*-1
    return suppressed,gt_sup
Example #14
0
    def _sample_output(self, all_rois, gt_boxes, im_scale, gt_masks, mask_info):
        overlaps = bbox_overlaps(
            np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        labels = gt_boxes[gt_assignment, 4]
        # Sample foreground indexes
        fg_inds = np.where(max_overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
        bg_inds = np.where(max_overlaps < cfg.TRAIN.BBOX_THRESH)[0]
        keep_inds = np.append(fg_inds, bg_inds).astype(int)
        # Select sampled values from various arrays:
        labels = labels[keep_inds]
        # Clamp labels for the background RoIs to 0
        labels[len(fg_inds):] = 0
        rois = all_rois[keep_inds]

        bbox_target_data = bbox_compute_targets(
            rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], normalize=True)
        bbox_target_data = np.hstack((labels[:, np.newaxis], bbox_target_data))\
            .astype(np.float32, copy=False)
        bbox_targets, bbox_inside_weights = get_bbox_regression_label(
            bbox_target_data, 21)

        scaled_rois = rois[:, 1:5] / float(im_scale)
        scaled_gt_boxes = gt_boxes[:, :4] / float(im_scale)

        pos_masks = np.zeros((len(keep_inds), 1,  cfg.MASK_SIZE,  cfg.MASK_SIZE))
        top_mask_info = np.zeros((len(keep_inds), 12))
        top_mask_info[len(fg_inds):, :] = -1

        for i, val in enumerate(fg_inds):
            gt_box = scaled_gt_boxes[gt_assignment[val]]
            gt_box = np.around(gt_box).astype(int)
            ex_box = np.around(scaled_rois[i]).astype(int)
            gt_mask = gt_masks[gt_assignment[val]]
            gt_mask_info = mask_info[gt_assignment[val]]
            gt_mask = gt_mask[0:gt_mask_info[0], 0:gt_mask_info[1]]
            # regression targets is the intersection of bounding box and gt mask
            ex_mask = intersect_mask(ex_box, gt_box, gt_mask)
            pos_masks[i, ...] = ex_mask
            top_mask_info[i, 0] = gt_assignment[val]
            top_mask_info[i, 1] = gt_mask_info[0]
            top_mask_info[i, 2] = gt_mask_info[1]
            top_mask_info[i, 3] = labels[i]
            top_mask_info[i, 4:8] = ex_box
            top_mask_info[i, 8:12] = gt_box

        return labels, rois, fg_inds, keep_inds, pos_masks, top_mask_info, bbox_targets, bbox_inside_weights
Example #15
0
 def _matched_information(self, curr_retrieved, curr_gt):
     is_matched = np.zeros((len(self.all_matched_threshold), 
         len(curr_retrieved)), dtype = np.bool)
     if len(curr_retrieved) == 0 or len(curr_gt) == 0:
         return is_matched 
     else:
         gt_overlaps = bbox_overlaps(curr_retrieved.astype(np.float),
                                     curr_gt.astype(np.float))
         matched_idx = gt_overlaps.argmax(axis = 1)
         for k in range(len(self.all_matched_threshold)):
             matched_threshold = self.all_matched_threshold[k]
             gt_used = np.zeros(len(curr_gt), dtype = np.bool)
             for i, j in enumerate(matched_idx):
                 if gt_overlaps[i, j] >= matched_threshold and \
                         gt_used[j] == False:
                     gt_used[j] = True
                     is_matched[k, i] = True
     return is_matched
Example #16
0
    def evaluate_recall(self, candidate_boxes=None, ar_thresh=0.5):
        # Record max overlap value for each gt box
        # Return vector of overlap values
        gt_overlaps = np.zeros(0)
        for i in xrange(self.num_images):
            gt_inds = np.where(self.roidb[i]['gt_classes'] > 0)[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]

            if candidate_boxes is None:
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            # gt_overlaps = np.hstack((gt_overlaps, overlaps.max(axis=0)))
            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in xrange(gt_boxes.shape[0]):
                argmax_overlaps = overlaps.argmax(axis=0)
                max_overlaps = overlaps.max(axis=0)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert(gt_ovr >= 0)
                box_ind = argmax_overlaps[gt_ind]
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert(_gt_overlaps[j] == gt_ovr)
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1

            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        num_pos = gt_overlaps.size
        gt_overlaps = np.sort(gt_overlaps)
        step = 0.001
        thresholds = np.minimum(np.arange(0.5, 1.0 + step, step), 1.0)
        recalls = np.zeros_like(thresholds)
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        ar = 2 * np.trapz(recalls, thresholds)

        return ar, gt_overlaps, recalls, thresholds
Example #17
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in xrange(self.num_images):
            boxes = box_list[i]

            #debug Brian
            image=cv2.imread(self.image_path_at(i))
            width = image.shape[1]
            for box in boxes:
                assert box[2]<width

            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                if not gt_boxes.shape[0]==0:
                    gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                    argmaxes = gt_overlaps.argmax(axis=1)
                    maxes = gt_overlaps.max(axis=1)
                    I = np.where(maxes > 0)[0]
                    overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            #is same-working  debug Brian
            #print 'DEBUG overlaps'
            #for ii in range(0,overlaps.shape[0]):
            #    s=''
            #    for jj in range(0,overlaps.shape[1]):
            #        s+=str(overlaps[ii,jj])+', '
            #    print s
            #assert False
            
            overlaps = scipy.sparse.csr_matrix(overlaps)
            #print overlaps
            roidb.append({'boxes' : boxes,
                          'gt_classes' : np.zeros((num_boxes,),
                                                  dtype=np.int32),
                          'gt_overlaps' : overlaps,
                          'flipped' : False})
        return roidb
Example #18
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        '''
        print '====='
        print len(box_list)
        print self.num_images
        print '====='
        '''
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in xrange(self.num_images):
            boxes = box_list[i]
            
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None:
                gt_boxes = gt_roidb[i]['boxes']
                gt_classes = gt_roidb[i]['gt_classes']
                gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                argmaxes = gt_overlaps.argmax(axis=1)
                maxes = gt_overlaps.max(axis=1)
                I = np.where(maxes > 0)[0]
                overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            overlaps = scipy.sparse.csr_matrix(overlaps)
	    #if i==823:
		#print '-===-=-=-==--===823-=-='
		#print boxes
		#print overlaps
		#exit()
           
            roidb.append({'boxes' : boxes,
                          'gt_classes' : np.zeros((num_boxes,),
                                                  dtype=np.int32),
                          'gt_overlaps' : overlaps,
                          'flipped' : False})
        print 'roidb size = %d'%(len(roidb))
        #print roidb[823]
        return roidb
Example #19
0
def evalCorLoc2(imdb,nms_dets,overlap=0.5):
    num_classes = len(nms_dets)
    num_images = len(nms_dets[0])
    gt = imdb.gt_roidb()
    pos = np.zeros(imdb.num_classes)
    tot = np.zeros(imdb.num_classes)
    for cls_ind in xrange(num_classes):
        for im_ind in xrange(num_images):
            dets = nms_dets[cls_ind][im_ind]
            if dets == []:
                continue
            if np.all(gt[im_ind]['gt_classes']!=cls_ind):
                continue
            sel = gt[im_ind]['gt_classes'] == cls_ind
            gtdet = (gt[im_ind]['boxes'][sel]).astype(np.float, copy=False)
            dets = dets.astype(np.float, copy=False)
            ovr = bbox_overlaps(gtdet,dets)
            tot[cls_ind] += gtdet.shape[0]
            pos[cls_ind] += np.sum(ovr.max(1)>overlap)
    corloc = pos[1:]/tot[1:]
    return corloc
Example #20
0
    def create_roidb_from_box_list(self, box_list, gt_roidb, weight_list=None):
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in xrange(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)

            gt_boxes = []
            if gt_roidb is not None:
                gt_boxes = gt_roidb[i]['boxes']

                #Need at least one box for argmax
                if gt_boxes.shape[0] > 0:
                    gt_classes = gt_roidb[i]['gt_classes']
                    gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                            gt_boxes.astype(np.float))
                    argmaxes = gt_overlaps.argmax(axis=1)
                    maxes = gt_overlaps.max(axis=1)
                    I = np.where(maxes > 0)[0]
                    overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]

            weight = None
            if weight_list is not None:
                weight = weight_list[i]
                assert weight.shape[0] == num_boxes, 'weight num should be same as boxes num'
            else:
                print 'weight is None\n'


            overlaps = scipy.sparse.csr_matrix(overlaps)
            roidb.append({'boxes' : boxes,
                          'gt_boxes' : gt_boxes,
                          'gt_classes' : np.zeros((num_boxes,),
                                                  dtype=np.int32),
                          'gt_overlaps' : overlaps,
                          'flipped' : False,
                          'weight' : weight})
        return roidb
Example #21
0
def evalCorLoc(imdb,nms_dets,overlap=0.5):
    num_classes = len(nms_dets)
    num_images = len(nms_dets[0])
    gt = imdb.gt_roidb()
    pos = np.zeros(imdb.num_classes)
    tot = np.zeros(imdb.num_classes)
    for cls_ind in xrange(1,num_classes):
        for im_ind in xrange(num_images):
            dets = nms_dets[cls_ind][im_ind]
            if dets == []:
                print "Error, no detections!"
                dfsd
                continue
            if np.all(gt[im_ind]['gt_classes']!=cls_ind):
                continue
            sel = gt[im_ind]['gt_classes'] == cls_ind
            gtdet = (gt[im_ind]['boxes'][sel]).astype(np.float, copy=False)
            dets = dets.astype(np.float, copy=False)
            ovr = bbox_overlaps(gtdet,dets[:1])
            tot[cls_ind] += 1#gtdet.shape[0]
            pos[cls_ind] += ovr.max()>=overlap #> or >=
    corloc = pos[1:]/tot[1:]
    return corloc
def fixMatOverlap((mat_overlap_file,gt_file,im_file,out_file,idx)):
    print idx;
    im=scipy.misc.imread(im_file);
    gt_boxes=np.load(gt_file);
    gt_boxes=np.array([psr.convertBBoxFormatToStandard(gt_box) for gt_box in gt_boxes]);

    mat_info=np.load(mat_overlap_file);

    pred_scores = mat_info['pred_scores']
    gt_boxes_size = mat_info['gt_boxes_size']
    mat_overlap = mat_info['mat_overlap']
    pred_boxes = mat_info['pred_boxes']

    # print mat_info.keys();
    pred_boxes=mat_info['pred_boxes'];
    
    min_arr=np.zeros((pred_boxes.shape[0],2));
    min_arr[:,0]=pred_boxes[:,1];
    pred_boxes[:,1]=np.max(min_arr,axis=1);

    min_arr=np.zeros((pred_boxes.shape[0],2));
    min_arr[:,0]=pred_boxes[:,0];
    pred_boxes[:,0]=np.max(min_arr,axis=1);

    max_r=im.shape[0]*np.ones((pred_boxes.shape[0],2));
    max_r[:,0]=pred_boxes[:,2];
    pred_boxes[:,2]=np.min(max_r,axis=1);

    max_r=im.shape[1]*np.ones((pred_boxes.shape[0],2));
    max_r[:,0]=pred_boxes[:,3];
    pred_boxes[:,3]=np.min(max_r,axis=1);

    # mat_overlap_new=psr.getMatOverlap(pred_boxes,gt_boxes)

    mat_overlap_new=cython_bbox.bbox_overlaps(np.array(pred_boxes,dtype=np.float),np.array(gt_boxes,dtype=np.float));
    
    np.savez(out_file,pred_scores = pred_scores,gt_boxes_size = gt_boxes_size,mat_overlap = mat_overlap_new,pred_boxes = pred_boxes)
Example #23
0
def bbox_vote(dets_NMS, dets_all, thresh=0.5):
    dets_voted = np.zeros_like(dets_NMS)   # Empty matrix with the same shape and type

    _overlaps = bbox_overlaps(
			np.ascontiguousarray(dets_NMS[:, 0:4], dtype=np.float),
			np.ascontiguousarray(dets_all[:, 0:4], dtype=np.float))

    # for each survived box
    for i, det in enumerate(dets_NMS):
        dets_overlapped = dets_all[np.where(_overlaps[i, :] >= thresh)[0]]
        assert(len(dets_overlapped) > 0)

        boxes = dets_overlapped[:, 0:4]
        scores = dets_overlapped[:, 4]

        out_box = np.dot(scores, boxes)

        dets_voted[i][0:4] = out_box / sum(scores)        # Weighted bounding boxes
        dets_voted[i][4] = det[4]                         # Keep the original score

        # Weighted scores (if enabled)
        if cfg.TEST.BBOX_VOTE_N_WEIGHTED_SCORE > 1:
            n_agreement = cfg.TEST.BBOX_VOTE_N_WEIGHTED_SCORE
            w_empty = cfg.TEST.BBOX_VOTE_WEIGHT_EMPTY

            n_detected = len(scores)

            if n_detected >= n_agreement:
                top_scores = -np.sort(-scores)[:n_agreement]
                new_score = np.average(top_scores)
            else:
                new_score = np.average(scores) * (n_detected * 1.0 + (n_agreement - n_detected) * w_empty) / n_agreement

            dets_voted[i][4] = min(new_score, dets_voted[i][4])

    return dets_voted
Example #24
0
def prepare_roidb(imdb):
    """Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            imdb._roidb = cPickle.load(fid)
        print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
        return

    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        boxes = roidb[i]['boxes']
        labels = roidb[i]['gt_classes']
        info_boxes = np.zeros((0, 18), dtype=np.float32)

        if boxes.shape[0] == 0:
            roidb[i]['info_boxes'] = info_boxes
            continue

        # compute grid boxes
        s = PIL.Image.open(imdb.image_path_at(i)).size
        image_height = s[1]
        image_width = s[0]
        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)
        
        # for each scale
        for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
            boxes_rescaled = boxes * scale

            # compute overlap
            overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float))
            max_overlaps = overlaps.max(axis = 1)
            argmax_overlaps = overlaps.argmax(axis = 1)
            max_classes = labels[argmax_overlaps]

            # select positive boxes
            fg_inds = []
            for k in xrange(1, imdb.num_classes):
                fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])

            if len(fg_inds) > 0:
                gt_inds = argmax_overlaps[fg_inds]
                # bounding box regression targets
                gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:])
                # scale mapping for RoI pooling
                scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
                scale_map = cfg.TRAIN.SCALES[scale_ind_map]
                # contruct the list of positive boxes
                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
                info_box[:, 0] = cx[fg_inds]
                info_box[:, 1] = cy[fg_inds]
                info_box[:, 2] = scale_ind
                info_box[:, 3:7] = boxes_grid[fg_inds,:]
                info_box[:, 7] = scale_ind_map
                info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale
                info_box[:, 12] = labels[gt_inds]
                info_box[:, 14:] = gt_targets
                info_boxes = np.vstack((info_boxes, info_box))

        roidb[i]['info_boxes'] = info_boxes

    with open(cache_file, 'wb') as fid:
        cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
    print 'wrote gt roidb prepared to {}'.format(cache_file)
Example #25
0
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image,
                 num_classes, bg_aux_label):
    """Generate a random sample of RoIs comprising foreground and background
    examples
    """

    # Remove boxes that overlaps with ignored gt boxes
    ignored_mask = gt_boxes[:, 3] < 0
    gt_ignored_boxes = gt_boxes[ignored_mask, :]
    gt_boxes = gt_boxes[np.logical_not(ignored_mask), :]

    if len(gt_ignored_boxes):
        ignored_overlaps = bbox_overlaps(
            np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
            np.ascontiguousarray(gt_ignored_boxes[:, :4], dtype=np.float))
        max_ignored_overlaps = ignored_overlaps.max(axis=1)
        all_rois = all_rois[max_ignored_overlaps <
                            0.4, :]  # FIXME: Remove this hardcoded constant

    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.array(np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0],
                       dtype=int)
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)

    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image

    NEAR_FRACTION = 0.2
    bg_near_cnt = int(np.floor(bg_rois_per_this_image * NEAR_FRACTION))
    bg_near_inds = np.array(
        np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                 & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0],
        dtype=int)
    bg_near_cnt = min(bg_near_cnt, bg_near_inds.size)
    if bg_near_inds.size > 0:
        bg_near_inds = npr.choice(bg_near_inds,
                                  size=bg_near_cnt,
                                  replace=False)

    bg_far_cnt = bg_rois_per_this_image - bg_near_cnt
    bg_far_inds = np.array((np.where(max_overlaps < 0.01)[0])[:300], dtype=int)

    bg_far_cnt = int(min(bg_far_cnt, bg_far_inds.size))
    bg_far_inds = npr.choice(bg_far_inds, size=bg_far_cnt, replace=False)
    bg_inds = np.append(bg_near_inds, bg_far_inds)

    # bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # # Sample background regions without replacement
    # if bg_inds.size > 0:
    #     bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)

    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]

    # keep2 = nms(np.hstack((rois, np.linspace(1, 0, len(rois), dtype=np.float32).reshape(-1, 1))), 0.5)

    # Auxiliary label if available
    aux_label = None
    if gt_boxes.shape[1] > 5:
        aux_label = gt_boxes[gt_assignment, 5]
        aux_label = aux_label[keep_inds]
        aux_label[fg_rois_per_this_image:] = bg_aux_label

    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, bbox_targets, bbox_inside_weights, aux_label
    def forward(self, bottom, top):
        # prep incoming data==========
        rpn_boxes = bottom[0].data.copy()
        bbox_pred = bottom[1].data
        scores = bottom[2].data
        im_info = bottom[3].data[0]
        im_idx = int(bottom[4].data)
        im_data = bottom[5].data[0, :, :, :].transpose((1, 2, 0)).copy()
        m = self.meta
        im_id = self._image_id[im_idx]
        r_anno = self.r_anno[im_id]
        # prep done============

        # prep blobs for forward
        blobs = {}
        s_classeme = []
        s_rois = []
        s_rois_encoded = []
        o_classeme = []
        o_rois = []
        o_rois_encoded = []
        relation_label = []

        gt_boxes = []
        if hasattr(r_anno, 'relationship'):
            rpn_boxes_img_coor = rpn_boxes[:, 1:5] / im_info[2]
            boxes = rpn_boxes_img_coor
            boxes = bbox_transform_inv(boxes, bbox_pred)
            boxes = clip_boxes(boxes, (im_info[0] / im_info[2], im_info[1] / im_info[2]))

            cv2.normalize(im_data, im_data, 255, 0, cv2.NORM_MINMAX)
            im_data = im_data.astype(np.uint8)

            origsz = (im_info[1] / im_info[2], im_info[0] / im_info[2])
            im_data = cv2.resize(im_data, origsz)
            thresh_final = .5

            res_locations = []
            res_classemes = []
            res_cls_confs = []
            boxes_tosort = []
            for j in xrange(1, 101):
                inds = np.where(scores[:, j] > .3)[0]
                cls_scores = scores[inds, j]
                cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
                cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis], inds[:, np.newaxis])) \
                    .astype(np.float32, copy=False)
                # pred_boxes = clip_boxes(pred_boxes, im.shape)
                if len(cls_scores) <= 0:
                    boxes_tosort.append(cls_dets)
                    continue

                res_loc = np.hstack((cls_boxes, inds[:, np.newaxis]))
                res_classeme = scores[inds]
                res_cls_conf = np.column_stack((np.zeros(cls_scores.shape[0]) + j, cls_scores))
                keep = nms(cls_dets[:,:5], .3)  # nms threshold
                cls_dets = cls_dets[keep, :]
                res_loc = res_loc[keep]
                res_classeme = res_classeme[keep]
                res_cls_conf = res_cls_conf[keep]
                res_classemes.extend(res_classeme)
                res_locations.extend(res_loc)
                res_cls_confs.extend(res_cls_conf)
                boxes_tosort.append(cls_dets)
            try:
                # final class confidence
                inds = np.where(np.array(res_cls_confs)[:, 1] > thresh_final)[0]

                classemes = np.array(res_classemes)[inds]
                locations = np.array(res_locations)[inds]
                cls_confs = np.array(res_cls_confs)[inds]
                # decide what to pass to top

                # limit max
                w, h = self.meta['train/' + im_id + '/w'][...], self.meta['train/' + im_id + '/h'][...]
                if not isinstance(r_anno.relationship, np.ndarray):
                    r_anno.relationship = [r_anno.relationship]
                for r in xrange(len(r_anno.relationship)):
                    if not hasattr(r_anno.relationship[r], 'phrase'):
                        continue
                    predicate = r_anno.relationship[r].phrase[1]
                    ymin, ymax, xmin, xmax = r_anno.relationship[r].subBox
                    sub_bbox = [xmin, ymin, xmax, ymax]
                    gt_boxes.append(sub_bbox)

                    ymin, ymax, xmin, xmax = r_anno.relationship[r].objBox

                    obj_bbox = [xmin, ymin, xmax, ymax]
                    gt_boxes.append(obj_bbox)
                    overlaps = bbox_overlaps(
                        np.ascontiguousarray([sub_bbox, obj_bbox], dtype=np.float),
                        np.ascontiguousarray(locations, dtype=np.float))
                    if overlaps.shape[0] == 0:
                        continue

                    sub_sorted = overlaps[0].argsort()[-40:][::-1]
                    obj_sorted = overlaps[1].argsort()[-40:][::-1]
                    while len(sub_sorted) > 0 and overlaps[0][sub_sorted[-1]] < .6: sub_sorted = sub_sorted[:-1]
                    while len(obj_sorted) > 0 and overlaps[1][obj_sorted[-1]] < .6: obj_sorted = obj_sorted[:-1]

                    if len(sub_sorted) <= 0 or len(obj_sorted) <= 0:
                        continue

                    cnt = 0
                    for s in sub_sorted[:1]:  # sub_idx:
                        for o in obj_sorted[:1]:  # obj_idx:
                            if s != o and cnt < 20:
                                sub_clsmemes = classemes[s]
                                obj_clsmemes = classemes[o]
                                sub_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[s]]))[0]
                                obj_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[o]]))[0]
                                relation = self.meta['meta/pre/name2idx/' + predicate][...]
                                # all done, now we put forward
                                s_classeme.append(sub_clsmemes)
                                o_classeme.append(obj_clsmemes)
                                s_rois.append(rpn_boxes[locations[s][-1]])
                                o_rois.append(rpn_boxes[locations[o][-1]])
                                s_rois_encoded.append(sub_box_encoded)
                                o_rois_encoded.append(obj_box_encoded)
                                relation_label.append(np.float32(relation))
                                cnt += 1
                # final step copy all the stuff for forward
                blobs['s_classeme'] = np.array(s_classeme)
                blobs['o_classeme'] = np.array(o_classeme)
                blobs['s_rois'] = np.array(s_rois)
                blobs['o_rois'] = np.array(o_rois)
                blobs['s_rois_encoded'] = np.array(s_rois_encoded)
                blobs['o_rois_encoded'] = np.array(o_rois_encoded)
                blobs['relation_label'] = np.array(relation_label)
            except:
                blobs = self._prev_blob
            if blobs['s_classeme'].shape[0] == 0:
                blobs = self._prev_blob
        else:
            blobs = self._prev_blob
        visualize_gt(im_data,gt_boxes)
        visualize(im_data, boxes_tosort, rpn_boxes_img_coor, m,thresh_final)
        for blob_name, blob in blobs.iteritems():
            top_ind = self._name_to_top_map[blob_name]
            # Reshape net's input blobs
            top[top_ind].reshape(*(blob.shape))
            # Copy data into net's input blobs
            top[top_ind].data[...] = blob.astype(np.float32, copy=False)

        # this becomes a dummy for forward in case things fail
        if blobs['relation_label'][0] != -1:
            for blob_name, blob in blobs.iteritems():
                blobs[blob_name] = blob[0, np.newaxis]
                if blob_name == 'relation_label':
                    blobs[blob_name][...] = -1
        self._prev_blob = blobs
def imdb_rpn_compute_stats(net,
                           imdb,
                           anchor_scales=(8, 16, 32),
                           feature_stride=16):
    raw_anchors = generate_anchors(scales=np.array(anchor_scales))
    print(raw_anchors.shape)
    sums = 0
    squred_sums = 0
    counts = 0
    roidb = filter_roidb(imdb.roidb)
    # Compute a map of input image size and output feature map blob
    map_w = {}
    map_h = {}
    for i in range(50, cfg.TRAIN.MAX_SIZE + 10):
        blobs = {
            'data': np.zeros((1, 3, i, i)),
            'im_info': np.asarray([[i, i, 1.0]])
        }
        net.blobs['data'].reshape(*(blobs['data'].shape))
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
        blobs_out = net.forward(data=blobs['data'].astype(np.float32,
                                                          copy=False),
                                im_info=blobs['im_info'].astype(np.float32,
                                                                copy=False))
        height, width = net.blobs['rpn/output'].data.shape[-2:]
        map_w[i] = width
        map_h[i] = height

    for i in range(len(roidb)):
        if not i % 5000:
            print('computing %d/%d' % (i, imdb.num_images))
        im = cv2.imread(roidb[i]['image'])
        im_data, im_info = _get_image_blob(im)
        gt_boxes = roidb[i]['boxes']
        gt_boxes = gt_boxes * im_info[0, 2]
        height = map_h[im_data.shape[2]]
        width = map_w[im_data.shape[3]]
        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * feature_stride
        shift_y = np.arange(0, height) * feature_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = raw_anchors.shape[0]
        K = shifts.shape[0]
        all_anchors = (raw_anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))

        # only keep anchors inside the image
        inds_inside = np.where((all_anchors[:, 0] >= 0)
                               & (all_anchors[:, 1] >= 0)
                               & (all_anchors[:, 2] < im_info[0, 1]) &  # width
                               (all_anchors[:, 3] < im_info[0, 0])  # height
                               )[0]

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]

        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))

        # There are 2 types of bbox targets
        # 1. anchor whose overlaps with gt is greater than RPN_POSITIVE_OVERLAP
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        fg_inds = np.where(max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP)[0]
        # 2. anchors which best match certain gt
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        fg_inds = np.unique(np.hstack((fg_inds, gt_argmax_overlaps)))
        gt_rois = gt_boxes[argmax_overlaps, :]

        anchors = anchors[fg_inds, :]
        gt_rois = gt_rois[fg_inds, :]
        targets = bbox_transform(anchors, gt_rois[:, :4]).astype(np.float32,
                                                                 copy=False)
        sums += targets.sum(axis=0)
        squred_sums += (targets**2).sum(axis=0)
        counts += targets.shape[0]

    means = sums / counts
    stds = np.sqrt(squred_sums / counts - means**2)
    print(means)
    print(stds)
    return means, stds
Example #28
0
def get_label(anchor_list, gt_box, image_raw_size, batch_size):
    #class label
    over_lap_matrix = np.zeros([len(anchor_list), len(gt_box)])
    label = np.zeros(len(anchor_list))
    label.fill(-1)

    inside_idx = np.where((anchor_list[:, 0] >= 0) & (anchor_list[:, 1] >= 0)
                          & (anchor_list[:, 2] < image_raw_size[1])
                          & (anchor_list[:, 3] < image_raw_size[0]))[0]

    over_lap_matrix = bbox_overlaps(
        np.ascontiguousarray(anchor_list, dtype=np.float),
        np.ascontiguousarray(gt_box, dtype=np.float))

    anchor_max_idx = over_lap_matrix.argmax(axis=1)
    over_lap_max = over_lap_matrix[np.arange(len(anchor_list)), anchor_max_idx]

    label[over_lap_max >= RPN_POSITIVE_OVERLAP] = 1
    label[(over_lap_max < RPN_NEGATIVE_OVERLAP)] = 0
    for i in range(len(anchor_list)):
        if i not in inside_idx:
            label[i] = -1
    '''
    for i, anchor in enumerate(anchor_list):
        max_area = -1.0
        index = -1
        for j,box in enumerate(gt_box):
            if in_image(anchor, image_raw_size):
                area = over_lap(anchor,box)
                if max_area < area:
                    max_area = area
                    index = i
                over_lap_matrix[i,j] = area
        if index >-1:
            if max_area >= RPN_POSITIVE_OVERLAP:
                label[i] = 1
            elif max_area < RPN_NEGATIVE_OVERLAP:
                label[i] = 0
            inside_index.append(i)
    '''

    gt_max_index = over_lap_matrix.argmax(axis=0)
    gt_max = over_lap_matrix[gt_max_index, np.arange(over_lap_matrix.shape[1])]
    gt_max_index = np.where(over_lap_matrix == gt_max)[0]
    label[gt_max_index] = 1
    '''
    for j,box in enumerate(gt_box):
        max_area = 0.0
        index = -1
        for i, anchor in enumerate(anchor_list):
            area = over_lap_matrix[i,j]
            if max_area < area:
                max_area = area
                index = i
        if index >-1:
            for i in range(len(anchor_list)):
                if over_lap_matrix[i,j] == max_area:
                    label[i] = 1
    '''

    fg_num = int(RPN_FG_FACTOR * batch_size)
    fg_index = np.where(label == 1)[0]
    if len(fg_index) > fg_num:
        remove_index = np.random.choice(fg_index,
                                        size=(len(fg_index) - fg_num),
                                        replace=False)
        label[remove_index] = -1

    bg_num = batch_size - np.sum(label == 1)
    bg_index = np.where(label == 0)[0]
    if len(bg_index) > bg_num:
        remove_index = np.random.choice(bg_index,
                                        size=(len(bg_index) - bg_num),
                                        replace=False)
        label[remove_index] = -1

    in_weight = np.zeros([len(anchor_list), 4], dtype=np.float32)
    out_weight = np.zeros([len(anchor_list), 4], dtype=np.float32)

    #bbox label
    dx = np.zeros(len(anchor_list))
    dy = np.zeros(len(anchor_list))
    dw = np.zeros(len(anchor_list))
    dh = np.zeros(len(anchor_list))

    ws = anchor_list[inside_idx, 2] - anchor_list[inside_idx, 0] + 1.0
    hs = anchor_list[inside_idx, 3] - anchor_list[inside_idx, 1] + 1.0
    center_xs = anchor_list[inside_idx, 0] + ws / 2
    center_ys = anchor_list[inside_idx, 1] + hs / 2

    gt_target = gt_box[anchor_max_idx]

    target_w = gt_target[inside_idx, 2] - gt_target[inside_idx, 0] + 1.0
    target_h = gt_target[inside_idx, 3] - gt_target[inside_idx, 1] + 1.0
    target_center_x = gt_target[inside_idx, 0] + target_w / 2.0
    target_center_y = gt_target[inside_idx, 1] + target_h / 2.0

    dx[inside_idx] = (target_center_x - center_xs) / ws
    dy[inside_idx] = (target_center_y - center_ys) / hs
    dw[inside_idx] = np.log(target_w / ws)
    dh[inside_idx] = np.log(target_h / hs)

    num_examples = np.sum(label >= 0)

    in_weight[label == 1] = [1.0] * 4
    out_weight[label == 1] = [1.0 / num_examples] * 4
    out_weight[label == 0] = [1.0 / num_examples] * 4
    '''
    for i, anchor in enumerate(anchor_list):
        w = anchor[2]-anchor[0]+1.0
        h = anchor[3]-anchor[1]+1.0
        center_x = anchor[0] + w/2
        center_y = anchor[1] + h/2

        max_gt = over_lap_matrix[i].argmax()
        if in_image(anchor, image_raw_size):
            target = gt_box[max_gt]

            target_w = target[2]-target[0]+1.0
            target_h = target[3]-target[1]+1.0
            target_center_x = target[0] + target_w/2
            target_center_y = target[1] + target_h/2

            dx[i] = (target_center_x-center_x)/w
            dy[i] = (target_center_y-center_y)/h
            dw[i] = np.log(target_w/w)
            dh[i] = np.log(target_h/h)
            if label[i]==1:
                in_weight[i] = [1.0]*4
                out_weight[i] = [1.0/num_examples]*4
            if label[i]==0:
                out_weight[i] = [1.0/num_examples]*4
    '''
    bbox_target = np.vstack((dx, dy, dw, dh)).transpose()
    return label, bbox_target, in_weight, out_weight
Example #29
0
	for i in xrange(len(imdb.image_index)):

		# Load gt boxes
		gt_inds = np.where(roidb[i]['gt_classes'] >= 0)[0]
		gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
		gt_boxes[:, 0:4] = roidb[i]['boxes'][gt_inds, :] * im_scale
		gt_boxes[:, 4] = roidb[i]['gt_classes'][gt_inds]
	
		# label: 1 is positive, 0 is negative, -1 is dont care
		#labels = np.empty((len(inds_inside), ), dtype=np.float32)
		labels = np.empty((total_anchors, ), dtype=np.float32)
		labels.fill(-1)

		# Computer overlap
		overlaps = bbox_overlaps(
	            np.ascontiguousarray(anchors, dtype=np.float),
	            np.ascontiguousarray(gt_boxes, dtype=np.float))

		argmax_overlaps = overlaps.argmax(axis=1)               # gt index
		#max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]   
		max_overlaps = overlaps[np.arange(total_anchors), argmax_overlaps]   
		gt_argmax_overlaps = overlaps.argmax(axis=0)            # anchor index
		gt_max_overlaps = overlaps[gt_argmax_overlaps,
	                               np.arange(overlaps.shape[1])]    
		gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]


	    # bg label: assign bg labels first so that positive labels can clobber them
		labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

		# fg label: for each gt, anchor with highest overlap
    def _load_pascal3d_voxel_exemplar_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the pascal subcategory exemplar format.
        """

        if self._image_set == 'val':
            return self._load_pascal_annotation(index)

        filename = os.path.join(self._pascal3d_path, cfg.SUBCLS_NAME, index + '.txt')
        assert os.path.exists(filename), \
                'Path does not exist: {}'.format(filename)

        # the annotation file contains flipped objects    
        lines = []
        lines_flipped = []
        with open(filename) as f:
            for line in f:
                words = line.split()
                subcls = int(words[1])
                is_flip = int(words[2])
                if subcls != -1:
                    if is_flip == 0:
                        lines.append(line)
                    else:
                        lines_flipped.append(line)
        
        num_objs = len(lines)

        # store information of flipped objects
        assert (num_objs == len(lines_flipped)), 'The number of flipped objects is not the same!'
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        
        for ix, line in enumerate(lines_flipped):
            words = line.split()
            subcls = int(words[1])
            gt_subclasses_flipped[ix] = subcls

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            subcls = int(words[1])
            # Make pixel indexes 0-based
            boxes[ix, :] = [float(n)-1 for n in words[3:7]]
            gt_classes[ix] = cls
            gt_subclasses[ix] = subcls
            overlaps[ix, cls] = 1.0
            subindexes[ix, cls] = subcls
            subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix]

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float))
        
                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25]
                scales = 2**np.arange(1, 6, 0.5)
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float))
        
                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0])

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_subclasses': gt_subclasses,
                'gt_subclasses_flipped': gt_subclasses_flipped,
                'gt_overlaps': overlaps,
                'gt_subindexes': subindexes, 
                'gt_subindexes_flipped': subindexes_flipped, 
                'flipped' : False}
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, anchor_scales, anchor_ratios):
    """Same as the anchor target layer in original Fast/er RCNN """
    scales = np.array(anchor_scales)
    ratios = np.array(anchor_ratios)
    num_anchors = scales.shape[0] * ratios.shape[0]
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    im_info = im_info[0]

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Example #32
0
def pose_target_layer(rois, bbox_prob, bbox_pred, gt_boxes, poses,
                      is_training):

    rois = rois.detach().cpu().numpy()
    bbox_prob = bbox_prob.detach().cpu().numpy()
    bbox_pred = bbox_pred.detach().cpu().numpy()
    gt_boxes = gt_boxes.detach().cpu().numpy()
    num_classes = bbox_prob.shape[1]

    # process boxes
    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes))
        means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS),
                        (num_classes))
        bbox_pred *= stds
        bbox_pred += means

    boxes = rois[:, 2:6].copy()
    pred_boxes = bbox_transform_inv(boxes, bbox_pred)

    # assign boxes
    for i in range(rois.shape[0]):
        cls = int(rois[i, 1])
        rois[i, 2:6] = pred_boxes[i, cls * 4:cls * 4 + 4]
        rois[i, 6] = bbox_prob[i, cls]

    # convert boxes to (batch_ids, x1, y1, x2, y2, cls)
    roi_blob = rois[:, (0, 2, 3, 4, 5, 1)]
    gt_box_blob = np.zeros((0, 6), dtype=np.float32)
    pose_blob = np.zeros((0, 9), dtype=np.float32)
    for i in range(gt_boxes.shape[0]):
        for j in range(gt_boxes.shape[1]):
            if gt_boxes[i, j, -1] > 0:
                gt_box = np.zeros((1, 6), dtype=np.float32)
                gt_box[0, 0] = i
                gt_box[0, 1:5] = gt_boxes[i, j, :4]
                gt_box[0, 5] = gt_boxes[i, j, 4]
                gt_box_blob = np.concatenate((gt_box_blob, gt_box), axis=0)
                poses[i, j, 0] = i
                pose_blob = np.concatenate(
                    (pose_blob, poses[i, j, :].cpu().reshape(1, 9)), axis=0)

    if gt_box_blob.shape[0] == 0:
        num = rois.shape[0]
        poses_target = np.zeros((num, 4 * num_classes), dtype=np.float32)
        poses_weight = np.zeros((num, 4 * num_classes), dtype=np.float32)
    else:
        # overlaps: (rois x gt_boxes)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(roi_blob[:, :5], dtype=np.float),
            np.ascontiguousarray(gt_box_blob[:, :5], dtype=np.float))

        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        labels = gt_box_blob[gt_assignment, 5]
        quaternions = pose_blob[gt_assignment, 2:6]

        # Select foreground RoIs as those with >= FG_THRESH overlap
        bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH_POSE)[0]
        labels[bg_inds] = 0

        bg_inds = np.where(roi_blob[:, -1] != labels)[0]
        labels[bg_inds] = 0

        # in training, only use the positive boxes for pose regression
        if is_training:
            fg_inds = np.where(labels > 0)[0]
            if len(fg_inds) > 0:
                rois = rois[fg_inds, :]
                quaternions = quaternions[fg_inds, :]
                labels = labels[fg_inds]

        # pose regression targets and weights
        poses_target, poses_weight = _compute_pose_targets(
            quaternions, labels, num_classes)

    return torch.from_numpy(rois).cuda(), torch.from_numpy(
        poses_target).cuda(), torch.from_numpy(poses_weight).cuda()
Example #33
0
    def create_roidb_from_box_list(self, box_list, gt_roidb):
        assert len(box_list) == self.num_images, \
                'Number of boxes must match number of ground-truth images'
        roidb = []
        for i in xrange(self.num_images):
            boxes = box_list[i]
            num_boxes = boxes.shape[0]
            overlaps = np.zeros((num_boxes, self.num_classes),
                                dtype=np.float32)
            subindexes = np.zeros((num_boxes, self.num_classes),
                                  dtype=np.int32)
            subindexes_flipped = np.zeros((num_boxes, self.num_classes),
                                          dtype=np.int32)
            if cfg.TRAIN.VIEWPOINT == True or cfg.TEST.VIEWPOINT == True:
                viewindexes_azimuth = np.zeros((num_boxes, self.num_classes),
                                               dtype=np.float32)
                viewindexes_azimuth_flipped = np.zeros(
                    (num_boxes, self.num_classes), dtype=np.float32)
                viewindexes_elevation = np.zeros((num_boxes, self.num_classes),
                                                 dtype=np.float32)
                viewindexes_elevation_flipped = np.zeros(
                    (num_boxes, self.num_classes), dtype=np.float32)
                viewindexes_rotation = np.zeros((num_boxes, self.num_classes),
                                                dtype=np.float32)
                viewindexes_rotation_flipped = np.zeros(
                    (num_boxes, self.num_classes), dtype=np.float32)

            if gt_roidb is not None:
                gt_boxes = gt_roidb[i]['boxes']
                if gt_boxes.shape[0] != 0 and num_boxes != 0:
                    gt_classes = gt_roidb[i]['gt_classes']
                    gt_subclasses = gt_roidb[i]['gt_subclasses']
                    gt_subclasses_flipped = gt_roidb[i][
                        'gt_subclasses_flipped']
                    if cfg.TRAIN.VIEWPOINT == True or cfg.TEST.VIEWPOINT == True:
                        gt_viewpoints = gt_roidb[i]['gt_viewpoints']
                        gt_viewpoints_flipped = gt_roidb[i][
                            'gt_viewpoints_flipped']
                    gt_overlaps = bbox_overlaps(boxes.astype(np.float),
                                                gt_boxes.astype(np.float))
                    argmaxes = gt_overlaps.argmax(axis=1)
                    maxes = gt_overlaps.max(axis=1)
                    I = np.where(maxes > 0)[0]
                    overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
                    subindexes[I, gt_classes[argmaxes[I]]] = gt_subclasses[
                        argmaxes[I]]
                    subindexes_flipped[
                        I, gt_classes[argmaxes[I]]] = gt_subclasses_flipped[
                            argmaxes[I]]
                    if cfg.TRAIN.VIEWPOINT == True or cfg.TEST.VIEWPOINT == True:
                        viewindexes_azimuth[
                            I, gt_classes[argmaxes[I]]] = gt_viewpoints[
                                argmaxes[I], 0]
                        viewindexes_azimuth_flipped[
                            I,
                            gt_classes[argmaxes[I]]] = gt_viewpoints_flipped[
                                argmaxes[I], 0]
                        viewindexes_elevation[
                            I, gt_classes[argmaxes[I]]] = gt_viewpoints[
                                argmaxes[I], 1]
                        viewindexes_elevation_flipped[
                            I,
                            gt_classes[argmaxes[I]]] = gt_viewpoints_flipped[
                                argmaxes[I], 1]
                        viewindexes_rotation[
                            I, gt_classes[argmaxes[I]]] = gt_viewpoints[
                                argmaxes[I], 2]
                        viewindexes_rotation_flipped[
                            I,
                            gt_classes[argmaxes[I]]] = gt_viewpoints_flipped[
                                argmaxes[I], 2]

            overlaps = scipy.sparse.csr_matrix(overlaps)
            subindexes = scipy.sparse.csr_matrix(subindexes)
            subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

            if cfg.TRAIN.VIEWPOINT == True or cfg.TEST.VIEWPOINT == True:
                viewindexes_azimuth = scipy.sparse.csr_matrix(
                    viewindexes_azimuth)
                viewindexes_azimuth_flipped = scipy.sparse.csr_matrix(
                    viewindexes_azimuth_flipped)
                viewindexes_elevation = scipy.sparse.csr_matrix(
                    viewindexes_elevation)
                viewindexes_elevation_flipped = scipy.sparse.csr_matrix(
                    viewindexes_elevation_flipped)
                viewindexes_rotation = scipy.sparse.csr_matrix(
                    viewindexes_rotation)
                viewindexes_rotation_flipped = scipy.sparse.csr_matrix(
                    viewindexes_rotation_flipped)
                roidb.append({
                    'boxes':
                    boxes,
                    'gt_classes':
                    np.zeros((num_boxes, ), dtype=np.int32),
                    'gt_viewpoints':
                    np.zeros((num_boxes, 3), dtype=np.float32),
                    'gt_viewpoints_flipped':
                    np.zeros((num_boxes, 3), dtype=np.float32),
                    'gt_viewindexes_azimuth':
                    viewindexes_azimuth,
                    'gt_viewindexes_azimuth_flipped':
                    viewindexes_azimuth_flipped,
                    'gt_viewindexes_elevation':
                    viewindexes_elevation,
                    'gt_viewindexes_elevation_flipped':
                    viewindexes_elevation_flipped,
                    'gt_viewindexes_rotation':
                    viewindexes_rotation,
                    'gt_viewindexes_rotation_flipped':
                    viewindexes_rotation_flipped,
                    'gt_subclasses':
                    np.zeros((num_boxes, ), dtype=np.int32),
                    'gt_subclasses_flipped':
                    np.zeros((num_boxes, ), dtype=np.int32),
                    'gt_overlaps':
                    overlaps,
                    'gt_subindexes':
                    subindexes,
                    'gt_subindexes_flipped':
                    subindexes_flipped,
                    'flipped':
                    False
                })
            else:
                roidb.append({
                    'boxes':
                    boxes,
                    'gt_classes':
                    np.zeros((num_boxes, ), dtype=np.int32),
                    'gt_subclasses':
                    np.zeros((num_boxes, ), dtype=np.int32),
                    'gt_subclasses_flipped':
                    np.zeros((num_boxes, ), dtype=np.int32),
                    'gt_overlaps':
                    overlaps,
                    'gt_subindexes':
                    subindexes,
                    'gt_subindexes_flipped':
                    subindexes_flipped,
                    'flipped':
                    False
                })
        return roidb
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    # anchor的总数
    K = total_anchors / num_anchors

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    # only keep anchors inside the image
    # 筛选出all_anchors中所有满足条件的anchor的索引
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    # 建立一个随机生成的数组,维度指定
    labels.fill(-1)
    # labels中的内容用-1初始化(1:前景,0:背景,-1:忽略)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    # 计算rpn得到的anchor和groundtrue_box的重叠面积shape=(len(anchors),len(gx_boxes))
    # overlaps[i][j]代表了第i个anchor与第j个gtbox的重叠面积
    argmax_overlaps = overlaps.argmax(axis=1)
    # 返回每个anchor对应的最匹配的gt_box的编号
    # axis=1:找每一行的最大值,拿出第1+1维度进行比较
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    # 根据索引得到值
    # max_overlap是满足要求的anchor的分数
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    # 取每一列的最大值,返回与每个gt_box最匹配的anchor
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    # 返回与每个gt_box最匹配的anchor
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
    # np.where输出overlaps中满足条件的元素的位置索引。[0]是第0维坐标
    # !!特么返回的是gt_max_overlaps按照降序排列后在overlaps中位置的第多少行
    # 返回每个gt_boxes对应的overlap最大的anchor的序号,降序排列

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
        # 记录anchor与gt_box的ioU值小于RPN_NEGATIVE_OVERLAP的为负样本

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1
    # 记录anchor与gt_box的ioU值最大的为正样本

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
    # 记录anchor与gt_box的ioU值大于RPN_POSITIVE_OVERLAP的为正样本

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    # 如果正样本过多,就进行采样。采样比例由RPN_FG_FRACTION和RPN_BATCHSIZE控制
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION *
                 cfg.TRAIN.RPN_BATCHSIZE)  # 0.5*256
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1
        # numpy.random.choice  参数size表示输出的shape,

    # subsample negative labels if we have too many
    # 如果负样本过多,就进行采样。采样比例由
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
    # anchor是所有满足条件的anchor,argmax_overlaps是每个anchor对应的最匹配的gt_box的编号
    # gt_boxes是ground truth边界框
    # gt_boxes[argmax_overlaps, :]是每个anchor对应ioU最大的gt_boxes的边界框,
    # _compute_targets返回gt框和anchor框相差的dxdydhdw
    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
    # RPN_BBOX_INSIDE_WEIGHTS=[1,1,1,1]

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))

    # 计算正样本/负样本和anchor总数的比值
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
      .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
      .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    # rpn_bbox_inside_weights用于把是object的box过滤出来,
    # 因为并不是所有的anchors都是有object的。
    # rpn_bbox_inside_weights用于设置标记为1的box和标记为0的box的权值比率

    # rpn_bbox_targets是计算出来的dxdydhdw

    # rpn_labels是标签值,1,0,-1
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Example #35
0
def prepare_roidb(imdb):
    """Enrich the imdb's roidb by adding some derived quantities that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    cache_file = os.path.join(imdb.cache_path,
                              imdb.name + '_gt_roidb_prepared.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            imdb._roidb = cPickle.load(fid)
        print '{} gt roidb prepared loaded from {}'.format(
            imdb.name, cache_file)
        return

    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        boxes = roidb[i]['boxes']
        labels = roidb[i]['gt_classes']
        info_boxes = np.zeros((0, 18), dtype=np.float32)

        if boxes.shape[0] == 0:
            roidb[i]['info_boxes'] = info_boxes
            continue

        # compute grid boxes
        s = PIL.Image.open(imdb.image_path_at(i)).size
        image_height = s[1]
        image_width = s[0]
        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)

        # for each scale
        for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
            boxes_rescaled = boxes * scale

            # compute overlap
            overlaps = bbox_overlaps(boxes_grid.astype(np.float),
                                     boxes_rescaled.astype(np.float))
            max_overlaps = overlaps.max(axis=1)
            argmax_overlaps = overlaps.argmax(axis=1)
            max_classes = labels[argmax_overlaps]

            # select positive boxes
            fg_inds = []
            for k in xrange(1, imdb.num_classes):
                fg_inds.extend(
                    np.where((max_classes == k)
                             & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])

            if len(fg_inds) > 0:
                gt_inds = argmax_overlaps[fg_inds]
                # bounding box regression targets
                gt_targets = _compute_targets(boxes_grid[fg_inds, :],
                                              boxes_rescaled[gt_inds, :])
                # scale mapping for RoI pooling
                scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
                scale_map = cfg.TRAIN.SCALES[scale_ind_map]
                # contruct the list of positive boxes
                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
                info_box[:, 0] = cx[fg_inds]
                info_box[:, 1] = cy[fg_inds]
                info_box[:, 2] = scale_ind
                info_box[:, 3:7] = boxes_grid[fg_inds, :]
                info_box[:, 7] = scale_ind_map
                info_box[:, 8:12] = boxes_grid[fg_inds, :] * scale_map / scale
                info_box[:, 12] = labels[gt_inds]
                info_box[:, 14:] = gt_targets
                info_boxes = np.vstack((info_boxes, info_box))

        roidb[i]['info_boxes'] = info_boxes

    with open(cache_file, 'wb') as fid:
        cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
    print 'wrote gt roidb prepared to {}'.format(cache_file)
Example #36
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image,
                 rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
    '''
  对ROI进行采样
  '''
    # overlaps: (rois x gt_boxes)
    # 计算rois和gt_boxes的overlaps
    # roi格式(0, x1, y1, x2, y2),gt_box格式(x,y,x,y,label)
    # 只取对应的xyxy
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    # 返回每一行最大那一列的下标,也就是rois对应overlap最大的gt_box的索引
    gt_assignment = overlaps.argmax(axis=1)
    # 一样,只不过返回的是值
    max_overlaps = overlaps.max(axis=1)
    # 对应最大gt_box的label
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    # max_overlaps>=0.5的记录为前景,返回的也是下标
    fg_inds = np.where(
        max_overlaps >= cfg.TRAIN.FG_THRESH)[0]  # __C.TRAIN.FG_THRESH = 0.5
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    # max_overlaps在[0.1,0.5]记录为背景
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       &  # __C.TRAIN.BG_THRESH_HI = 0.5
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[
                           0]  # __C.TRAIN.BG_THRESH_LO = 0.1

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:  # 如果前景、背景都存在
        # 下面的意思就是如果样本很多,则随机采样去除一些
        # 在anchor_target_layer里面有相同的操作
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds,
                             size=int(fg_rois_per_image),
                             replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_image),
                             replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = 0
    else:
        import pdb
        pdb.set_trace()

    # The indices that we're selecting (both fg and bg)
    # 将前景背景的下标拼起来
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # 提取对应的labels,相当于重新排了一下,前景在前,背景在后
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    # 然后把背景的全部赋值为0
    labels[int(fg_rois_per_image):] = 0
    # 下两个提取对应的roi和得分
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]
    # 用_compute_targets函数把xyxy坐标转换成delta坐标 ,也就是计算偏移量

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4],
        labels)  # gt_assignment = overlaps.argmax(axis=1)
    # 最后bbox_target_data格式[[label,tx,ty,tw,th],[label,tx,ty,tw,th]]

    # 根据bbox_target_data偏移量,计算出回归的label
    bbox_targets, bbox_inside_weights = \
      _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
Example #37
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors
        # measure GT overlap

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        height, width = bottom[0].data.shape[-2:]
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes = bottom[1].data
        # im_info
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print ''
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])
            print 'height, width: ({}, {})'.format(height, width)
            print 'rpn: gt_boxes.shape', gt_boxes.shape
            print 'rpn: gt_boxes', gt_boxes

        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -self._allowed_border)
            & (all_anchors[:, 1] >= -self._allowed_border)
            & (all_anchors[:, 2] < im_info[1] + self._allowed_border)
            &  # width
            (all_anchors[:, 3] < im_info[0] + self._allowed_border)  # height
        )[0]

        if DEBUG:
            print 'total_anchors', total_anchors
            print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors.shape', anchors.shape

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)  #一维
        labels.fill(-1)  #全部置为初始值-1,这两步难道不能一步到位么?

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        # 这里的gt_boxes是真的gt?是的!这就是就是来确定anchor的正负
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        """
        这样?
            gt0  gt1 gt2 gt3 ...
        a0  0.2  0.3 0.1 0.7 ... 
        a1
        a2 
        """
        argmax_overlaps = overlaps.argmax(axis=1)  # 取最大值索引
        #取每个anchor对应的最大iou的值
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        #上面是1-anchor对多-gt,这里是1-gt对多anchor
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        # 相当于通[i,j]来对二维矩阵进行索引取值
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        #得到(array([x0,x1,..],array([y0,y1,..]))
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        """
        注意,在默认设置中这里确定ANCHOR的正负所用的阈值并不是连续的,
        例如RPN_POSITIVE_OVERLAP等于0.5,而RPN_NEGATIVE_OVERLAP等于0.3,
        那么中间必然出现一段真空区域,在这个区域内的ANCHOR自然也就是-1,
        会被丢弃!
        """
        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # 两种情况下的anchor为positive
        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # subsample positive labels if we have too many
        # default, 0.5 * 256, 因为论文中所说的anchor的正负比例为1:1
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        # 如果fg多了就要丢弃一些
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds,
                                      size=(len(fg_inds) - num_fg),
                                      replace=False)
            labels[disable_inds] = -1  # 丢弃的置为-1

        # subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            labels[disable_inds] = -1
            #print "was %s inds, disabling %s, now %s inds" % (
            #len(bg_inds), len(disable_inds), np.sum(labels == 0))

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array(
            cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

        bbox_outside_weights = np.zeros((len(inds_inside), 4),
                                        dtype=np.float32)
        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            # uniform weighting of examples (given non-uniform sampling)
            num_examples = np.sum(labels >= 0)
            positive_weights = np.ones((1, 4)) * 1.0 / num_examples
            negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
            positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                                np.sum(labels == 1))
            negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                                np.sum(labels == 0))
        bbox_outside_weights[labels == 1, :] = positive_weights
        bbox_outside_weights[labels == 0, :] = negative_weights

        if DEBUG:
            self._sums += bbox_targets[labels == 1, :].sum(axis=0)
            self._squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
            self._counts += np.sum(labels == 1)
            means = self._sums / self._counts
            stds = np.sqrt(self._squared_sums / self._counts - means**2)
            print 'means:'
            print means
            print 'stdevs:'
            print stds

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights,
                                     total_anchors,
                                     inds_inside,
                                     fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights,
                                      total_anchors,
                                      inds_inside,
                                      fill=0)

        if DEBUG:
            print 'rpn: max max_overlap', np.max(max_overlaps)
            print 'rpn: num_positive', np.sum(labels == 1)
            print 'rpn: num_negative', np.sum(labels == 0)
            self._fg_sum += np.sum(labels == 1)
            self._bg_sum += np.sum(labels == 0)
            self._count += 1
            print 'rpn: num_positive avg', self._fg_sum / self._count
            print 'rpn: num_negative avg', self._bg_sum / self._count

        # labels, rpn-labels也就是从rpn的角度分出来的fg,bg
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, 1, A * height, width))
        top[0].reshape(*labels.shape)
        top[0].data[...] = labels

        # bbox_targets
        bbox_targets = bbox_targets \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        top[1].reshape(*bbox_targets.shape)
        top[1].data[...] = bbox_targets

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_inside_weights.shape[2] == height
        assert bbox_inside_weights.shape[3] == width
        top[2].reshape(*bbox_inside_weights.shape)
        top[2].data[...] = bbox_inside_weights

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_outside_weights.shape[2] == height
        assert bbox_outside_weights.shape[3] == width
        top[3].reshape(*bbox_outside_weights.shape)
        top[3].data[...] = bbox_outside_weights
Example #38
0
    def _load_kitti_voxel_exemplar_annotation(self, index):
        """
        Load image and bounding boxes info from txt file in the KITTI voxel exemplar format.
        """
        if self._image_set == 'training' and self._seq_name != 'trainval':
            prefix = 'train'
        elif self._image_set == 'training':
            prefix = 'trainval'
        else:
            prefix = ''

        if prefix == '':
            lines = []
            lines_flipped = []
        else:
            filename = os.path.join(self._kitti_tracking_path, cfg.SUBCLS_NAME, prefix, index + '.txt')
            if os.path.exists(filename):
                print filename

                # the annotation file contains flipped objects    
                lines = []
                lines_flipped = []
                with open(filename) as f:
                    for line in f:
                        words = line.split()
                        subcls = int(words[1])
                        is_flip = int(words[2])
                        if subcls != -1:
                            if is_flip == 0:
                                lines.append(line)
                            else:
                                lines_flipped.append(line)
            else:
                lines = []
                lines_flipped = []
        
        num_objs = len(lines)

        # store information of flipped objects
        assert (num_objs == len(lines_flipped)), 'The number of flipped objects is not the same!'
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        
        for ix, line in enumerate(lines_flipped):
            words = line.split()
            subcls = int(words[1])
            gt_subclasses_flipped[ix] = subcls

        boxes = np.zeros((num_objs, 4), dtype=np.float32)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32)

        for ix, line in enumerate(lines):
            words = line.split()
            cls = self._class_to_ind[words[0]]
            subcls = int(words[1])
            boxes[ix, :] = [float(n) for n in words[3:7]]
            gt_classes[ix] = cls
            gt_subclasses[ix] = subcls
            overlaps[ix, cls] = 1.0
            subindexes[ix, cls] = subcls
            subindexes_flipped[ix, cls] = gt_subclasses_flipped[ix]

        overlaps = scipy.sparse.csr_matrix(overlaps)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float))
        
                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                base_size = 16
                ratios = [3.0, 2.0, 1.5, 1.0, 0.75, 0.5, 0.25]
                scales = 2**np.arange(1, 6, 0.5)
                anchors = generate_anchors(base_size, ratios, scales)
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float))
        
                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0])

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_subclasses': gt_subclasses,
                'gt_subclasses_flipped': gt_subclasses_flipped,
                'gt_overlaps': overlaps,
                'gt_subindexes': subindexes, 
                'gt_subindexes_flipped': subindexes_flipped, 
                'flipped' : False}
Example #39
0
    def get_minibatch(self):
        blobs = {}
        idx = np.random.choice(len(self.rdata['annotation_train']), self._batch_size)

        # labels_blob = np.zeros(self._batch_size,np.int32)
        data = []
        visual = []
        classeme = []
        classeme_s = []
        classeme_o = []
        visual_s = []
        visual_o = []
        loc_s = []
        loc_o = []
        location = []
        labels = []
        cnt = 0
        while cnt < self._batch_size:
            idx = np.random.choice(len(self.rdata['annotation_train']), 1)
            anno = self.rdata['annotation_train'][idx[0]]
            objs = []
            im_id = anno.filename.split('.')[0]
            if im_id not in self.vgg_data:
                continue
            classemes = self.vgg_data[im_id]['classemes']
            visuals = self.vgg_data[im_id]['visuals']
            locations = self.vgg_data[im_id]['locations']
            cls_confs = self.vgg_data[im_id]['cls_confs']

            w, h = self.meta['train/' + im_id + '/w'][...], self.meta['train/' + im_id + '/h'][...]
            if hasattr(anno, 'relationship'):

                if not isinstance(anno.relationship, np.ndarray):
                    anno.relationship = [anno.relationship]
                for r in xrange(len(anno.relationship)):
                    if not hasattr(anno.relationship[r], 'phrase'):
                        continue
                    predicate = anno.relationship[r].phrase[1]
                    ymin, ymax, xmin, xmax = anno.relationship[r].subBox
                    sub_bbox = [xmin, ymin, xmax, ymax]

                    ymin, ymax, xmin, xmax = anno.relationship[r].objBox
                    obj_bbox = [xmin, ymin, xmax, ymax]
                    overlaps = bbox_overlaps(
                        np.ascontiguousarray([sub_bbox, obj_bbox], dtype=np.float),
                        np.ascontiguousarray(locations, dtype=np.float))
                    if overlaps.shape[0] == 0:
                        continue
                    try:
                        assignment = overlaps.argmax(axis=1)
                    except:
                        continue

                    sub_sorted = overlaps[0].argsort()[-30:][::-1]
                    obj_sorted = overlaps[1].argsort()[-30:][::-1]
                    while len(sub_sorted) > 0 and overlaps[0][sub_sorted[-1]] < .7: sub_sorted = sub_sorted[:-1]
                    while len(obj_sorted) > 0 and overlaps[1][obj_sorted[-1]] < .7: obj_sorted = obj_sorted[:-1]

                    if len(sub_sorted) <= 0 or len(obj_sorted) <= 0:
                        continue

                    sub_idx = np.random.choice(len(sub_sorted), 1)
                    obj_idx = np.random.choice(len(obj_sorted), 1)

                    for s in sub_sorted[:1]:  # sub_idx:
                        for o in obj_sorted[:1]:  # obj_idx:
                            if s != o and cnt < self._batch_size:
                                sub_visual = visuals[s]
                                obj_visual = visuals[o]
                                sub_clsmemes = classemes[s]
                                obj_clsmemes = classemes[o]
                                sub_box_encoded = bbox_transform(np.array([locations[o]]), np.array([locations[s]]))[0]
                                obj_box_encoded = bbox_transform(np.array([locations[s]]), np.array([locations[o]]))[0]

                                #sub_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[s]]))[0]
                                #obj_box_encoded = bbox_transform(np.array([[0, 0, w, h]]), np.array([locations[o]]))[0]
                                relation = self.meta['meta/pre/name2idx/' + predicate][...]
                                labels.append(np.float32(relation))
                                classeme_s.append(sub_clsmemes)
                                classeme_o.append(obj_clsmemes)
                                visual_s.append(sub_visual)
                                visual_o.append(obj_visual)
                                loc_s.append(sub_box_encoded)
                                loc_o.append(obj_box_encoded)
                                #visual.append(np.hstack((sub_visual, obj_visual)))
                                #classeme.append(np.hstack((sub_clsmemes, obj_clsmemes)))
                                location.append(sub_box_encoded)
                                cnt += 1
                    if cnt >= self._batch_size:
                        break
                        # bbox_transform()
        # blobs['visual'] = np.array(visual)
        blobs['classeme_s'] = np.array(classeme_s)
        blobs['classeme_o'] = np.array(classeme_o)
        blobs['visual_s'] = np.array(visual_s)
        blobs['visual_o'] = np.array(visual_o)
        blobs['location_s'] = np.array(loc_s)
        blobs['location_o'] = np.array(loc_o)
        # blobs['classeme'] = np.array(classeme)
        # blobs['location'] = np.array(location)
        blobs['label'] = np.array(labels)

        return blobs
Example #40
0
    def forward(self, step = 1):
        selected = False
        while not selected:
            index = self.index[self.iter]
            img_names = self.imagelist[index]
            proc_imgs = []
            for img_name in img_names:
                img_path = osp.join(self.root_dir, img_name)
                assert osp.isfile(img_path)
                proc_img, scale = _get_image_blob(cv2.imread(img_path))
                proc_imgs.append(proc_img)
            blobs = np.vstack(proc_imgs)
            bboxes  = self.bbox[index][0][:,:4]
            gts = self.gt[index]
            self.iter += step
            if self.iter >= len(self.imagelist):
                self.iter -= len(self.imagelist)
            if gts[0].shape[0] > 0: selected = True

        # sample rois
        overlaps = bbox_overlaps(np.require(bboxes, dtype=np.float),
                                 np.require(gts[0], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)

        fg_inds = np.where(max_overlaps >= self.config['select_overlap'])[0]
        # Guard against the case when an image has fewer than fg_rois_per_image
        # foreground RoIs
        fg_rois_per_this_image = min(self.config['batch_size'], fg_inds.size)
        # Sample foreground regions without replacement
        if fg_inds.size > 0:
            fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)

        bg_inds = np.where(max_overlaps < self.config['select_overlap'])[0]
        # Compute number of background RoIs to take from this image (guarding
        # against there being fewer than desired)
        bg_rois_per_this_image = self.config['batch_size'] - fg_rois_per_this_image
        bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
        # Sample background regions without replacement
        if bg_inds.size > 0:
            bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

        labels = np.ones((self.config['batch_size'], 1), dtype=np.float)
        labels[fg_rois_per_this_image:] = 0

        keep_ids = np.append(fg_inds, bg_inds)

        # n * 1 * 4
        rois = bboxes[keep_ids][:,np.newaxis,:]
        rois = np.tile(rois, (1, self.length, 1))
        rois  = rois * scale # scale rois to match image scale
        assignment = np.tile(np.arange(self.length), (self.config['batch_size'], 1))[:,:,np.newaxis]
        rois = np.concatenate((assignment, rois), axis=2).reshape((-1, 5))

        # compute targets and weights
        bbox_targets = []
        bbox_weights = []
        for gt in gts[1:]:
            cur_bbox_targets = bbox_transform(gts[0][gt_assignment[keep_ids]],
                                              gt[gt_assignment[keep_ids]])
            cur_bbox_weights = np.zeros_like(cur_bbox_targets)
            cur_bbox_weights[labels.flatten().astype('bool'), ...] = 1
            bbox_targets.append(cur_bbox_targets)
            bbox_weights.append(cur_bbox_weights)
        bbox_targets = np.hstack(bbox_targets)
        bbox_weights = np.hstack(bbox_weights)
        bbox_targets = (bbox_targets - self.bbox_mean) / self.bbox_std

        return blobs, rois, labels, bbox_targets, bbox_weights
Example #41
0
def ssd_gengt_layer(batch_pred_conf, prior_boxes, batch_gt_boxes):
    batch_labels = []
    batch_deltas = []
    overlap_threshold = cfg.TRAIN.overlap_threshold
    negative_mining_thresh = cfg.TRAIN.neg_overlap

    for tl in range(len(batch_gt_boxes)):
        pred_conf = batch_pred_conf[tl]
        gt_boxes = batch_gt_boxes[tl]
        first_ignore = np.argmax(np.fabs(gt_boxes[:, 0] - -1) < 1e-3)
        if np.fabs(gt_boxes[first_ignore, 0] - -1) < 1e-3:
            gt_boxes = gt_boxes[:first_ignore]

        num_gt_boxes = len(gt_boxes)
        num_anchors = len(prior_boxes)
        num_positive = 0

        overlaps = bbox_overlaps(
            np.ascontiguousarray(prior_boxes * cfg.image_size, dtype=np.float),
            np.ascontiguousarray(gt_boxes * cfg.image_size, dtype=np.float))
        # overlaps = bbox_overlaps_float(
        #    np.ascontiguousarray(prior_boxes, dtype=np.float),
        #    np.ascontiguousarray(gt_boxes, dtype=np.float))

        anchor_flags = np.empty((len(prior_boxes),), dtype=np.int32)
        anchor_flags.fill(-1)
        gt_flags = np.empty((len(prior_boxes),), dtype=np.bool)
        gt_flags.fill(False)

        max_matches_iou = np.empty((len(prior_boxes),), dtype=np.float32)
        max_matches_iou.fill(-1.0)
        max_matches_gtid = np.empty((len(prior_boxes),), dtype=np.int32)
        max_matches_gtid.fill(-1)

        # gt_boxes match priors
        queues = []
        queue_tops = []
        for i in range(len(gt_boxes)):
            inds = np.argpartition(
                overlaps[:, i], num_anchors - num_gt_boxes)[-num_gt_boxes:]
            sort_inds = np.argsort(overlaps[inds, i])[::-1]
            queues.append(inds[sort_inds])
            queue_tops.append(0)

        for i in range(num_gt_boxes):
            max_overlap = 1e-6
            best_gt = -1
            best_anchor = -1

            for j in range(num_gt_boxes):
                if gt_flags[j]:
                    continue
                while anchor_flags[queues[j][queue_tops[j]]] != -1:
                    queue_tops[j] += 1

                _anchor = queues[j][queue_tops[j]]
                if max_overlap < overlaps[_anchor][j]:
                    max_overlap = overlaps[_anchor][j]
                    best_gt = j
                    best_anchor = _anchor

            anchor_flags[best_anchor] = 1
            gt_flags[best_gt] = True
            max_matches_iou[best_anchor] = max_overlap
            max_matches_gtid[best_anchor] = best_gt
            num_positive += 1

        anchor_argmax_iou = overlaps.argmax(axis=1)
        anchor_max_iou = overlaps[np.arange(num_anchors), anchor_argmax_iou]
        # priors match gt_boxes
        if overlap_threshold > 0:
            inds = np.where((anchor_max_iou > 1e-6) & (anchor_flags != 1))
            max_matches_iou[inds] = anchor_max_iou[inds]
            max_matches_gtid[inds] = anchor_argmax_iou[inds]

            inds = np.where(
                (anchor_max_iou > overlap_threshold) & (anchor_flags != 1))
            gt_flags[anchor_argmax_iou[inds]] = True
            anchor_flags[inds] = 1
            num_positive += len(inds[0])

        # Negative mining
        max_pred_conf_head = np.max(pred_conf, axis=1, keepdims=True)
        pred_conf = np.exp(pred_conf - max_pred_conf_head)
        max_pred_conf = np.max(
            pred_conf[:, 1:], axis=1, keepdims=True) / \
                        np.sum(pred_conf, axis=1, keepdims=True)

        if cfg.TRAIN.do_neg_mining:
            num_negative = int(num_positive * cfg.TRAIN.neg_pos_ratio)
            if num_negative > (num_anchors - num_positive):
                num_negative = num_anchors - num_positive
            if num_negative > 0:
                inds = np.where((anchor_flags != 1) & (
                    max_matches_iou < negative_mining_thresh))

                max_matches_iou[inds] = anchor_max_iou[inds]
                max_matches_gtid[inds] = anchor_argmax_iou[inds]

                neg_inds = np.where((anchor_flags != 1) & (
                    max_matches_iou < negative_mining_thresh))[0]

                order = max_pred_conf[neg_inds].argsort(axis=0)[::-1]
                anchor_flags[neg_inds[order[:num_negative, 0]]] = 0

        labels = np.array(anchor_flags)
        inds = np.where(anchor_flags == 1)
        labels[inds] = gt_boxes[max_matches_gtid[inds], 4]

        deltas = bbox_transform(
            prior_boxes,
            gt_boxes[max_matches_gtid, :][:, :4].astype(
                np.float32, copy=False))

        batch_labels.append(labels)
        batch_deltas.append(deltas)

    return np.asarray(batch_labels, dtype=np.int32), np.asarray(
        batch_deltas, dtype=np.float32)
    def _load_pascal_annotation(self, index):
        """
        Load image and bounding boxes info from XML file in the PASCAL VOC
        format.
        """
        filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
        # print 'Loading: {}'.format(filename)
        def get_data_from_tag(node, tag):
            return node.getElementsByTagName(tag)[0].childNodes[0].data

        with open(filename) as f:
            data = minidom.parseString(f.read())

        objs = data.getElementsByTagName('object')
        num_objs = len(objs)

        boxes = np.zeros((num_objs, 4), dtype=np.uint16)
        gt_classes = np.zeros((num_objs), dtype=np.int32)
        overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)

        # Load object bounding boxes into a data frame.
        for ix, obj in enumerate(objs):
            # Make pixel indexes 0-based
            x1 = float(get_data_from_tag(obj, 'xmin')) - 1
            y1 = float(get_data_from_tag(obj, 'ymin')) - 1
            x2 = float(get_data_from_tag(obj, 'xmax')) - 1
            y2 = float(get_data_from_tag(obj, 'ymax')) - 1
            name =  str(get_data_from_tag(obj, "name")).lower().strip()
            if name in self._classes:
                cls = self._class_to_ind[name]
            else:
                cls = 0
            boxes[ix, :] = [x1, y1, x2, y2]
            gt_classes[ix] = cls
            overlaps[ix, cls] = 1.0

        overlaps = scipy.sparse.csr_matrix(overlaps)
        gt_subclasses = np.zeros((num_objs), dtype=np.int32)
        gt_subclasses_flipped = np.zeros((num_objs), dtype=np.int32)
        subindexes = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes_flipped = np.zeros((num_objs, self.num_classes), dtype=np.int32)
        subindexes = scipy.sparse.csr_matrix(subindexes)
        subindexes_flipped = scipy.sparse.csr_matrix(subindexes_flipped)

        if cfg.IS_RPN:
            if cfg.IS_MULTISCALE:
                # compute overlaps between grid boxes and gt boxes in multi-scales
                # rescale the gt boxes
                boxes_all = np.zeros((0, 4), dtype=np.float32)
                for scale in cfg.TRAIN.SCALES:
                    boxes_all = np.vstack((boxes_all, boxes * scale))
                gt_classes_all = np.tile(gt_classes, len(cfg.TRAIN.SCALES))

                # compute grid boxes
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]
                boxes_grid, _, _ = get_boxes_grid(image_height, image_width)

                # compute overlap
                overlaps_grid = bbox_overlaps(boxes_grid.astype(np.float), boxes_all.astype(np.float))
        
                # check how many gt boxes are covered by grids
                if num_objs != 0:
                    index = np.tile(range(num_objs), len(cfg.TRAIN.SCALES))
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes_all == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])
                    index_covered = np.unique(index[fg_inds])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[index_covered] == i)[0])
            else:
                assert len(cfg.TRAIN.SCALES_BASE) == 1
                scale = cfg.TRAIN.SCALES_BASE[0]
                feat_stride = 16
                # faster rcnn region proposal
                anchors = generate_anchors()
                num_anchors = anchors.shape[0]

                # image size
                s = PIL.Image.open(self.image_path_from_index(index)).size
                image_height = s[1]
                image_width = s[0]

                # height and width of the heatmap
                height = np.round((image_height * scale - 1) / 4.0 + 1)
                height = np.floor((height - 1) / 2 + 1 + 0.5)
                height = np.floor((height - 1) / 2 + 1 + 0.5)

                width = np.round((image_width * scale - 1) / 4.0 + 1)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)
                width = np.floor((width - 1) / 2.0 + 1 + 0.5)

                # gt boxes
                gt_boxes = boxes * scale

                # 1. Generate proposals from bbox deltas and shifted anchors
                shift_x = np.arange(0, width) * feat_stride
                shift_y = np.arange(0, height) * feat_stride
                shift_x, shift_y = np.meshgrid(shift_x, shift_y)
                shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
                # add A anchors (1, A, 4) to
                # cell K shifts (K, 1, 4) to get
                # shift anchors (K, A, 4)
                # reshape to (K*A, 4) shifted anchors
                A = num_anchors
                K = shifts.shape[0]
                all_anchors = (anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
                all_anchors = all_anchors.reshape((K * A, 4))

                # compute overlap
                overlaps_grid = bbox_overlaps(all_anchors.astype(np.float), gt_boxes.astype(np.float))
        
                # check how many gt boxes are covered by anchors
                if num_objs != 0:
                    max_overlaps = overlaps_grid.max(axis = 0)
                    fg_inds = []
                    for k in xrange(1, self.num_classes):
                        fg_inds.extend(np.where((gt_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH[k-1]))[0])

                    for i in xrange(self.num_classes):
                        self._num_boxes_all[i] += len(np.where(gt_classes == i)[0])
                        self._num_boxes_covered[i] += len(np.where(gt_classes[fg_inds] == i)[0])

        return {'boxes' : boxes,
                'gt_classes': gt_classes,
                'gt_subclasses': gt_subclasses,
                'gt_subclasses_flipped': gt_subclasses_flipped,
                'gt_overlaps' : overlaps,
                'gt_subindexes': subindexes,
                'gt_subindexes_flipped': subindexes_flipped,
                'flipped' : False}
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image,
                 num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # fg_rois_per_image = 32
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)

    # print ("===================all_rois_len: " + str(len(all_rois)) + ". gt_assignment len: " + str(len(gt_assignment)))
    # print ("gt_assignment: ")
    # print gt_assignment

    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=int(fg_rois_per_this_image),
                             replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_this_image),
                             replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_this_image):] = 0
    rois = all_rois[keep_inds]
    # positive rois
    rois_pos = np.zeros(
        (fg_inds.size, 5), dtype=np.float32
    )  #because return rois_pos as top ---> allocate memory for it
    rois_pos[:, :] = all_rois[fg_inds]
    gt_assignment_pos = gt_assignment[fg_inds]

    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)

    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    #return labels, rois, bbox_targets, bbox_inside_weights
    return labels, rois, bbox_targets, bbox_inside_weights, gt_boxes[
        gt_assignment[
            keep_inds], :], rois_pos, gt_assignment_pos  #[return them gt_boxes: original coordinate and class of gt ]
Example #44
0
cfg.TRAIN.USE_FLIPPED = False
imdb, roidb = combined_roidb('visual_genome_train_rel')
num_images = len(roidb)
data_layer = RoIDataLayer(imdb, roidb, bbox_means, bbox_stds)

epoch = 10
thresh = 0.8
fg_bg = AverageMeter()
print_freq = 100
for e in range(epoch):
    for i in range(num_images):
        blobs = data_layer.forward()
        predicates = blobs['predicates']
        rel_rois = blobs['rel_rois']
        fg_rel_inds = np.where(predicates)[0]
        bg_rel_inds = np.where(predicates==0)[0]
        fg_rel_rois = rel_rois[fg_rel_inds, 1:]
        bg_rel_rois = rel_rois[bg_rel_inds, 1:]
        fg_bg_overlaps = bbox_overlaps(fg_rel_rois, bg_rel_rois)
        fg_fg_overlaps = bbox_overlaps(fg_rel_rois, fg_rel_rois)

        fg_inds, bg_inds = np.where(fg_bg_overlaps > thresh)
        num_fg_bg_pair = len(fg_rel_inds) * len(bg_rel_inds)
        num = len(fg_inds)
        fg_bg.update(num / num_fg_bg_pair, num_fg_bg_pair)
        if i > 0 and i % print_freq == 0:
            print('(fg/bg)Val: {fg_bg.val:.3f}| (fg/bg)Avg: {fg_bg.avg:.3f}'.format(fg_bg=fg_bg))
    print('epoch {0}: (fg/bg)Val: {fg_bg.val:.3f}| (fg/bg)Avg: {fg_bg.avg:.3f}'.format(e, fg_bg=fg_bg))


Example #45
0
    def forward(self, bottom, top):
        # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
        # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
        bbox_pred = bottom[0].data
        # GT boxes (x1, y1, x2, y2, label)
        # TODO(rbg): it's annoying that sometimes I have extra info before
        # and other times after box coordinates -- normalize to one format
        gt_boxes = bottom[1].data

        cls_score = bottom[2].data

        #calculate overlaps
        overlaps = bbox_overlaps(
            np.ascontiguousarray(bbox_pred[:, 1:5], dtype=np.float),
            np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)


        #sift the boxs
        hard_indexs=[]
        labels=[]
        for i,max_overlap in enumerate(max_overlaps):
            if max_overlap>0.5 and cls_score[i][0]>cls_score[i][1]:
                hard_indexs.append(i)
                labels.append(1)
            if max_overlap<0.5 and cls_score[i][0]<cls_score[i][1]:
                hard_indexs.append(i)
                labels.append(0)
        if len(hard_indexs)==0:
            hard_indexs=range(len(bbox_pred))
            labels=bottom[4].data
        hard_num=len(hard_indexs)
        for i in xrange(len(bottom[7].data)):
            if i >= hard_num:
                hard_indexs.append(hard_indexs[i % hard_num])
                labels.append(labels[i % hard_num])
        labels = np.array(labels, dtype=np.float32)

        bbox_target_data = _compute_targets(
            bbox_pred[hard_indexs, 1:5], gt_boxes[gt_assignment[hard_indexs], :4], labels)

        bbox_targets, bbox_inside_weights = \
            _get_bbox_regression_labels(bbox_target_data, self._num_classes)


        hard_num=len(bottom[7].data)
        # top: 'cls_score_OHEM'
        top[0].reshape(hard_num,cls_score.shape[1])
        top[0].data[...]=cls_score[hard_indexs]
        #top[0].data[...]=cls_score#[hard_indexs]

        #top: 'labels_OHEM'
        top[1].reshape(hard_num)
        top[1].data[...]=labels
        #top[1].data[...]=bottom[4].data

        #top: 'bbox_targets_OHEM'
        top[2].reshape(hard_num,self._num_classes * 4)
        top[2].data[...] = bbox_targets
        #top[2].data[...] = bottom[3].data

        #top: 'bbox_pred_OHEM'
        top[3].reshape(hard_num, self._num_classes * 4)
        top[3].data[...] = bbox_pred[hard_indexs]
        #top[3].data[...] = bbox_pred#[hard_indexs]

        #top: 'bbox_inside_weights_OHEM'
        top[4].reshape(hard_num, self._num_classes*4)
        top[4].data[...] = bbox_inside_weights
        #top[4].data[...] = bottom[5].data

        #top: 'bbox_outside_weights_OHEM'
        top[5].reshape(hard_num, self._num_classes * 4)
        top[5].data[...] = np.array(bbox_inside_weights > 0).astype(np.float32)
        #top[5].data[...] = bottom[6].data

        #top: 'rois_OHEM'
        top[6].reshape(hard_num, 5)
        top[6].data[...] = bottom[7].data[hard_indexs]
Example #46
0
    def evaluate_recall(self,
                        candidate_boxes=None,
                        thresholds=None,
                        area='all',
                        limit=None):
        """Evaluate detection proposal recall metrics.

        Returns:
            results: dictionary of results with keys
                'ar': average recall
                'recalls': vector recalls at each IoU overlap threshold
                'thresholds': vector of IoU overlap thresholds
                'gt_overlaps': vector of all ground-truth overlaps
        """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = {
            'all': 0,
            'small': 1,
            'medium': 2,
            'large': 3,
            '96-128': 4,
            '128-256': 5,
            '256-512': 6,
            '512-inf': 7
        }
        area_ranges = [
            [0**2, 1e5**2],  # all
            [0**2, 32**2],  # small
            [32**2, 96**2],  # medium
            [96**2, 1e5**2],  # large
            [96**2, 128**2],  # 96-128
            [128**2, 256**2],  # 128-256
            [256**2, 512**2],  # 256-512
            [512**2, 1e5**2],  # 512-inf
        ]
        assert areas.has_key(area), 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in xrange(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(
                axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0)
                               & (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0])
                                     & (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in xrange(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert (gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert (_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return {
            'ar': ar,
            'recalls': recalls,
            'thresholds': thresholds,
            'gt_overlaps': gt_overlaps
        }
def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image,
                 num_classes, num_attr_classes, num_rel_classes, ignore_label):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))

    # GT boxes (x1, y1, x2, y2, label, attributes[16], relations[num_objs])
    has_attributes = num_attr_classes > 0
    if has_attributes:
        assert gt_boxes.shape[1] >= 21
    has_relations = num_rel_classes > 0
    if has_relations:
        assert gt_boxes.shape[0] == gt_boxes.shape[1]-21, \
            'relationships not found in gt_boxes, item length is only %d' % gt_boxes.shape[1]
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = int(min(bg_rois_per_this_image, bg_inds.size))
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # print 'proposal_target_layer:', keep_inds

    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0 / ignore_label
    labels[fg_rois_per_this_image:] = 0
    fg_gt = np.array(gt_assignment[fg_inds])
    if has_attributes:
        attributes = np.ones((fg_rois_per_image, 16)) * ignore_label
        attributes[:fg_rois_per_this_image, :] = gt_boxes[fg_gt, 5:21]
        np.place(attributes[:, 1:], attributes[:, 1:] == 0, ignore_label)
    else:
        attributes = None
    if has_relations:
        expand_rels = gt_boxes[fg_gt, 21:].T[fg_gt].T
        num_relations_per_this_image = np.count_nonzero(expand_rels)
        # Keep an equal number of 'no relation' outputs, the rest can be ignore
        expand_rels = expand_rels.flatten()
        no_rel_inds = np.where(expand_rels == 0)[0]
        if len(no_rel_inds) > num_relations_per_this_image:
            no_rel_inds = npr.choice(no_rel_inds,
                                     size=num_relations_per_this_image,
                                     replace=False)
        np.place(expand_rels, expand_rels == 0, ignore_label)
        expand_rels[no_rel_inds] = 0
        relations = np.ones((fg_rois_per_image, fg_rois_per_image),
                            dtype=np.float) * ignore_label
        relations[:fg_rois_per_this_image, :
                  fg_rois_per_this_image] = expand_rels.reshape(
                      (fg_rois_per_this_image, fg_rois_per_this_image))
        relations = relations.reshape((relations.size, 1, 1, 1))
    else:
        relations = None
    rois = all_rois[keep_inds]

    # print 'proposal_target_layer:', rois
    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)

    # print 'proposal_target_layer:', bbox_target_data
    bbox_targets, bbox_inside_weights = \
        _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, bbox_targets, bbox_inside_weights, attributes, relations
def _sample_rois(all_rois, gt_boxes, gt_masks, fg_rois_per_image, rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # foreground RoIs
    fg_rois_per_this_image = int(min(fg_rois_per_image, fg_inds.size))
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
                       (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]  # use [0] because max_overlaps is a column vector
    # Compute number of background RoIs to take from this image (guarding
    # against there being fewer than desired)
    bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[fg_rois_per_this_image:] = 0
    rois = all_rois[keep_inds]

    bbox_target_data = _compute_targets(
        rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)

    #*********************
    # sample gt_masks
    # clip to roi region
    # resize to 14*14
    #*********************
    mask_gt_keep = gt_masks[gt_assignment[keep_inds], :, :]
    scale = cfg.TRAIN.ROI_OUTPUT_SIZE*2
    mask_gt_data = np.zeros((len(keep_inds), scale, scale))
    for i in range(len(keep_inds)):
        if labels[i] >0:
            roi = rois[i,1:5]
            if cfg.DEBUG:
                print '_sample_roi'
                print 'i: '+ str(i) +' labels[i]:' + str(labels[i])
                print 'roi' +str(roi[0]) + ' ' +  str(roi[1]) + ' ' + str(roi[2]) + ' ' + str(roi[3])  
            mask_gt_clip = mask_gt_keep[i, int(round(roi[1])) : int(round(roi[3]))+1, int(round(roi[0])) : int(round(roi[2]))+1]
            if cfg.DEBUG:
                print 'mask_gt_keep.shape[1]: ' +str(mask_gt_keep.shape[1])
                print 'mask_gt_keep.shape[2]: ' + str(mask_gt_keep.shape[2])
                print 'mask_gt_clip.shape[0]: ' +str(mask_gt_clip.shape[0])
                print 'mask_gt_clip.shape[1]: ' + str(mask_gt_clip.shape[1])
            fx = float(scale)/mask_gt_clip.shape[1]
            fy = float(scale)/mask_gt_clip.shape[0]
            if cfg.DEBUG:
                print 'mask_gt_clip.shape[0]: ' +str(mask_gt_clip.shape[0])
                print 'mask_gt_clip.shape[1]: ' + str(mask_gt_clip.shape[1])
                print 'scale: ' +str(scale)
                print 'fx:' +str(fx)
                print 'fy:' +str(fy)
            mask_gt_data[i,:,:] = np.round(cv2.resize(mask_gt_clip, None, fx=fx, fy=fy))
        else:
            mask_gt_data[i,:,:] = np.zeros((scale,scale))

    labels_data = labels
    bbox_targets, bbox_inside_weights, labels, label_weights, mask_gt, mask_weights = \
        _get_bbox_regression_labels(bbox_target_data, labels_data, mask_gt_data, num_classes)

    if cfg.TRACE:
        print '========sample rois========'
	print 'fg_inds'
	print fg_inds
	print 'bg_inds'
	print bg_inds
        print 'rois: '
        print rois[0:5,:]
        print 'labels: '
        print labels[0:5,:]
        print 'label_weights: '
        print label_weights[0:5,:]
        print 'bbox_targets: '
        print bbox_targets[0:5,4*59:4*60]
        print 'mask_weighs: '
        print mask_weights[0:5,:,:,59]
        print 'save mask_gt'
        cv2.imwrite('/home/chsiyuan/Documents/542FinalProject/experiments/mask_gt.png',mask_gt[0,:,:,59]*255)


    return labels_data, rois, bbox_targets, bbox_inside_weights, mask_gt, label_weights, mask_weights
def anchor_target_layer(rpn_cls_score,
                        gt_boxes,
                        im_info,
                        data,
                        _feat_stride=[
                            16,
                        ],
                        anchor_scales=[4, 8, 16, 32]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]

    if DEBUG:
        print 'anchors:'
        print _anchors
        print 'anchor shapes:'
        print np.hstack((
            _anchors[:, 2::4] - _anchors[:, 0::4],
            _anchors[:, 3::4] - _anchors[:, 1::4],
        ))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    #height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    height, width = rpn_cls_score.shape[1:3]

    if DEBUG:
        print 'AnchorTargetLayer: height', height, 'width', width
        print ''
        print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
        print 'scale: {}'.format(im_info[2])
        print 'height, width: ({}, {})'.format(height, width)
        print 'rpn: gt_boxes.shape', gt_boxes.shape
        print 'rpn: gt_boxes', gt_boxes

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    all_anchors = (_anchors.reshape((1, A, 4)) + shifts.reshape(
        (1, K, 4)).transpose((1, 0, 2)))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print 'total_anchors', total_anchors
        print 'inds_inside', len(inds_inside)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print 'anchors.shape', anchors.shape

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))
    # max overlapped ground-truth box for each anchor (filtered by inds_inside)
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    # max overlapped anchor for each ground-truth box
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU, not really IOU but the
    # percentage of overlapped area to ground-truth box
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(fg_inds,
                                  size=(len(fg_inds) - num_fg),
                                  replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
        #print "was %s inds, disabling %s, now %s inds" % (
        #len(bg_inds), len(disable_inds), np.sum(labels == 0))

    # labels[] = 1, foreground
    #            0, background
    #           -1, disabled (not used)
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # Generate transform information to transform anchor to max
    # overlapped ground truth box
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means**2)
        print 'means:'
        print means
        print 'stdevs:'
        print stds

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    if DEBUG:
        print 'rpn: max max_overlap', np.max(max_overlaps)
        print 'rpn: num_positive', np.sum(labels == 1)
        print 'rpn: num_negative', np.sum(labels == 0)
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print 'rpn: num_positive avg', _fg_sum / _count
        print 'rpn: num_negative avg', _bg_sum / _count

    # labels
    #pdb.set_trace()
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    #assert bbox_inside_weights.shape[2] == height
    #assert bbox_inside_weights.shape[3] == width

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    #assert bbox_outside_weights.shape[2] == height
    #assert bbox_outside_weights.shape[3] == width

    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Example #50
0
def anchor_target_layer(rpn_cls_score, gt_boxes, im_info, _feat_stride, all_anchors, num_anchors):
  """Same as the anchor target layer in original Fast/er RCNN """
  A = num_anchors
  total_anchors = all_anchors.shape[0]
  K = total_anchors / num_anchors
  im_info = im_info[0]

  # allow boxes to sit over the edge by a small amount
  _allowed_border = 0

  # map of shape (..., H, W)
  height, width = rpn_cls_score.shape[1:3]

  # only keep anchors inside the image
  inds_inside = np.where(
    (all_anchors[:, 0] >= -_allowed_border) &
    (all_anchors[:, 1] >= -_allowed_border) &
    (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
    (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
  )[0]

  # keep only inside anchors
  anchors = all_anchors[inds_inside, :]

  # label: 1 is positive, 0 is negative, -1 is dont care
  labels = np.empty((len(inds_inside),), dtype=np.float32)
  labels.fill(-1)

  # overlaps between the anchors and the gt boxes
  # overlaps (ex, gt)
  overlaps = bbox_overlaps(
    np.ascontiguousarray(anchors, dtype=np.float),
    np.ascontiguousarray(gt_boxes, dtype=np.float))
  argmax_overlaps = overlaps.argmax(axis=1)
  max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
  gt_argmax_overlaps = overlaps.argmax(axis=0)
  gt_max_overlaps = overlaps[gt_argmax_overlaps,
                             np.arange(overlaps.shape[1])]
  gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

  if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
    # assign bg labels first so that positive labels can clobber them
    # first set the negatives
    labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

  # fg label: for each gt, anchor with highest overlap
  labels[gt_argmax_overlaps] = 1

  # fg label: above threshold IOU
  labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

  if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
    # assign bg labels last so that negative labels can clobber positives
    labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

  # subsample positive labels if we have too many
  num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
  fg_inds = np.where(labels == 1)[0]
  if len(fg_inds) > num_fg:
    disable_inds = npr.choice(
      fg_inds, size=(len(fg_inds) - num_fg), replace=False)
    labels[disable_inds] = -1

  # subsample negative labels if we have too many
  num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
  bg_inds = np.where(labels == 0)[0]
  if len(bg_inds) > num_bg:
    disable_inds = npr.choice(
      bg_inds, size=(len(bg_inds) - num_bg), replace=False)
    labels[disable_inds] = -1

  bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
  bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

  bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
  # only the positive ones have regression targets
  bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

  bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
  if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
    # uniform weighting of examples (given non-uniform sampling)
    num_examples = np.sum(labels >= 0)
    positive_weights = np.ones((1, 4)) * 1.0 / num_examples
    negative_weights = np.ones((1, 4)) * 1.0 / num_examples
  else:
    assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
            (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
    positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                        np.sum(labels == 1))
    negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                        np.sum(labels == 0))
  bbox_outside_weights[labels == 1, :] = positive_weights
  bbox_outside_weights[labels == 0, :] = negative_weights

  # map up to original set of anchors
  labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
  bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
  bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
  bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

  # # labels
  labels = labels.reshape((1, height, width*A, 1))
  rpn_labels = labels

  # bbox_targets
  bbox_targets = bbox_targets \
    .reshape((1, height, width, A * 4))

  rpn_bbox_targets = bbox_targets
  # bbox_inside_weights
  bbox_inside_weights = bbox_inside_weights \
    .reshape((1, height, width, A * 4))

  rpn_bbox_inside_weights = bbox_inside_weights

  # bbox_outside_weights
  bbox_outside_weights = bbox_outside_weights \
    .reshape((1, height, width, A * 4))

  rpn_bbox_outside_weights = bbox_outside_weights
  return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Example #51
0
def _sample_rois(all_rois, all_scores, gt_boxes, fg_rois_per_image,
                 rois_per_image, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
  examples.
  """
    # overlaps: (rois x gt_boxes)
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
        np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    labels = gt_boxes[gt_assignment, 4]

    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
    # Guard against the case when an image has fewer than fg_rois_per_image
    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI)
                       & (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]

    # Small modification to the original version where we ensure a fixed number of regions are sampled
    if fg_inds.size > 0 and bg_inds.size > 0:
        fg_rois_per_image = min(fg_rois_per_image, fg_inds.size)
        fg_inds = npr.choice(fg_inds,
                             size=int(fg_rois_per_image),
                             replace=False)
        bg_rois_per_image = rois_per_image - fg_rois_per_image
        to_replace = bg_inds.size < bg_rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(bg_rois_per_image),
                             replace=to_replace)
    elif fg_inds.size > 0:
        to_replace = fg_inds.size < rois_per_image
        fg_inds = npr.choice(fg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = rois_per_image
    elif bg_inds.size > 0:
        to_replace = bg_inds.size < rois_per_image
        bg_inds = npr.choice(bg_inds,
                             size=int(rois_per_image),
                             replace=to_replace)
        fg_rois_per_image = 0
    else:
        import pdb
        pdb.set_trace()

    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    labels = labels[keep_inds]
    # Clamp labels for the background RoIs to 0
    labels[int(fg_rois_per_image):] = 0
    rois = all_rois[keep_inds]
    roi_scores = all_scores[keep_inds]

    bbox_target_data = _compute_targets(rois[:, 1:5],
                                        gt_boxes[gt_assignment[keep_inds], :4],
                                        labels)

    bbox_targets, bbox_inside_weights = \
      _get_bbox_regression_labels(bbox_target_data, num_classes)

    return labels, rois, roi_scores, bbox_targets, bbox_inside_weights
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors
        # measure GT overlap

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        height, width = bottom[0].data.shape[-2:]
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes = bottom[1].data
        # im_info
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print ''
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])
            print 'height, width: ({}, {})'.format(height, width)
            print 'rpn: gt_boxes.shape', gt_boxes.shape
            print 'rpn: gt_boxes', gt_boxes

        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        all_anchors = (self._anchors.reshape((1, A, 4)) +
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -self._allowed_border) &
            (all_anchors[:, 1] >= -self._allowed_border) &
            (all_anchors[:, 2] < im_info[1] + self._allowed_border) &  # width
            (all_anchors[:, 3] < im_info[0] + self._allowed_border)    # height
        )[0]

        if DEBUG:
            print 'total_anchors', total_anchors
            print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors.shape', anchors.shape

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # subsample positive labels if we have too many
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(
                fg_inds, size=(len(fg_inds) - num_fg), replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(
                bg_inds, size=(len(bg_inds) - num_bg), replace=False)
            labels[disable_inds] = -1
            #print "was %s inds, disabling %s, now %s inds" % (
                #len(bg_inds), len(disable_inds), np.sum(labels == 0))

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

        bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            # uniform weighting of examples (given non-uniform sampling)
            num_examples = np.sum(labels >= 0)
            positive_weights = np.ones((1, 4)) * 1.0 / num_examples
            negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
            positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                                np.sum(labels == 1))
            negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                                np.sum(labels == 0))
        bbox_outside_weights[labels == 1, :] = positive_weights
        bbox_outside_weights[labels == 0, :] = negative_weights

        if DEBUG:
            self._sums += bbox_targets[labels == 1, :].sum(axis=0)
            self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
            self._counts += np.sum(labels == 1)
            means = self._sums / self._counts
            stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
            print 'means:'
            print means
            print 'stdevs:'
            print stds

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

        if DEBUG:
            print 'rpn: max max_overlap', np.max(max_overlaps)
            print 'rpn: num_positive', np.sum(labels == 1)
            print 'rpn: num_negative', np.sum(labels == 0)
            self._fg_sum += np.sum(labels == 1)
            self._bg_sum += np.sum(labels == 0)
            self._count += 1
            print 'rpn: num_positive avg', self._fg_sum / self._count
            print 'rpn: num_negative avg', self._bg_sum / self._count

        # labels
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, 1, A * height, width))
        top[0].reshape(*labels.shape)
        top[0].data[...] = labels

        # bbox_targets
        bbox_targets = bbox_targets \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        top[1].reshape(*bbox_targets.shape)
        top[1].data[...] = bbox_targets

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_inside_weights.shape[2] == height
        assert bbox_inside_weights.shape[3] == width
        top[2].reshape(*bbox_inside_weights.shape)
        top[2].data[...] = bbox_inside_weights

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_outside_weights.shape[2] == height
        assert bbox_outside_weights.shape[3] == width
        top[3].reshape(*bbox_outside_weights.shape)
        top[3].data[...] = bbox_outside_weights
Example #53
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate 9 anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the 9 anchors
        # filter out-of-image anchors
        # measure GT overlap

        # bottom[0] is the rpn_cls_score the foreground background classification prob (only the shape is used)
        # single item batches in training
        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W) w, h of feature map
        height, width = bottom[0].data.shape[-2:]
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes = bottom[1].data
        # im_info
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print ''
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])
            print 'height, width: ({}, {})'.format(height, width)
            print 'rpn: gt_boxes.shape', gt_boxes.shape
            print 'rpn: gt_boxes', gt_boxes

        # 1. Generate proposals from bbox deltas and shifted anchors
        # shift based on origin image size
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        all_anchors = (self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -self._allowed_border)
            & (all_anchors[:, 1] >= -self._allowed_border)
            & (all_anchors[:, 2] < im_info[1] + self._allowed_border)
            &  # width
            (all_anchors[:, 3] < im_info[0] + self._allowed_border)  # height
        )[0]

        if DEBUG:
            print 'total_anchors', total_anchors
            print 'inds_inside', len(inds_inside)

        # keep only inside anchors
        anchors = all_anchors[inds_inside, :]
        if DEBUG:
            print 'anchors.shape', anchors.shape

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        # the index of max overlap for each anchors
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
        # the index of max overlap for gt_box
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

        if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels first so that positive labels can clobber them
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

        if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
            # assign bg labels last so that negative labels can clobber positives
            labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

        # subsample positive labels if we have too many
        num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            disable_inds = npr.choice(fg_inds,
                                      size=(len(fg_inds) - num_fg),
                                      replace=False)
            labels[disable_inds] = -1

        # subsample negative labels if we have too many
        num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            labels[disable_inds] = -1
            #print "was %s inds, disabling %s, now %s inds" % (
            #len(bg_inds), len(disable_inds), np.sum(labels == 0))

        bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

        # assign smoothl1 loss weight, the weight with -1 label will be assigned with 0
        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array(
            cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

        bbox_outside_weights = np.zeros((len(inds_inside), 4),
                                        dtype=np.float32)
        if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
            # uniform weighting of examples (given non-uniform sampling)
            num_examples = np.sum(labels >= 0)
            positive_weights = np.ones((1, 4)) * 1.0 / num_examples
            negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
            positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                                np.sum(labels == 1))
            negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                                np.sum(labels == 0))
        bbox_outside_weights[labels == 1, :] = positive_weights
        bbox_outside_weights[labels == 0, :] = negative_weights

        if DEBUG:
            self._sums += bbox_targets[labels == 1, :].sum(axis=0)
            self._squared_sums += (bbox_targets[labels == 1, :]**2).sum(axis=0)
            self._counts += np.sum(labels == 1)
            means = self._sums / self._counts
            stds = np.sqrt(self._squared_sums / self._counts - means**2)
            print 'means:'
            print means
            print 'stdevs:'
            print stds

        # map up to original set of anchors
        # the useful anchors will be kept, invalid value will fill the value with "fill" parameter
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights,
                                     total_anchors,
                                     inds_inside,
                                     fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights,
                                      total_anchors,
                                      inds_inside,
                                      fill=0)

        if DEBUG:
            print 'rpn: max max_overlap', np.max(max_overlaps)
            print 'rpn: num_positive', np.sum(labels == 1)
            print 'rpn: num_negative', np.sum(labels == 0)
            self._fg_sum += np.sum(labels == 1)
            self._bg_sum += np.sum(labels == 0)
            self._count += 1
            print 'rpn: num_positive avg', self._fg_sum / self._count
            print 'rpn: num_negative avg', self._bg_sum / self._count

        # labels
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, 1, A * height, width))
        top[0].reshape(*labels.shape)
        top[0].data[...] = labels

        # bbox_targets
        bbox_targets = bbox_targets \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        top[1].reshape(*bbox_targets.shape)
        top[1].data[...] = bbox_targets

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_inside_weights.shape[2] == height
        assert bbox_inside_weights.shape[3] == width
        top[2].reshape(*bbox_inside_weights.shape)
        top[2].data[...] = bbox_inside_weights

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_outside_weights.shape[2] == height
        assert bbox_outside_weights.shape[3] == width
        top[3].reshape(*bbox_outside_weights.shape)
        top[3].data[...] = bbox_outside_weights
Example #54
0
    def evaluation(self, output_dir):
        self.write_dop_results(output_dir)

        filename = os.path.join(output_dir, 'results_deepim.mat')
        num_iterations = cfg.TEST.ITERNUM
        if os.path.exists(filename):
            results_all = scipy.io.loadmat(filename)
            print('load results from file')
            print(filename)
            distances_sys = results_all['distances_sys']
            distances_non = results_all['distances_non']
            errors_rotation = results_all['errors_rotation']
            errors_translation = results_all['errors_translation']
            results_seq_id = results_all['results_seq_id'].flatten()
            results_frame_id = results_all['results_frame_id'].flatten()
            results_object_id = results_all['results_object_id'].flatten()
            results_cls_id = results_all['results_cls_id'].flatten()
        else:
            # save results
            num_max = 200000
            num_results = num_iterations + 1
            distances_sys = np.zeros((num_max, num_results), dtype=np.float32)
            distances_non = np.zeros((num_max, num_results), dtype=np.float32)
            errors_rotation = np.zeros((num_max, num_results),
                                       dtype=np.float32)
            errors_translation = np.zeros((num_max, num_results),
                                          dtype=np.float32)
            results_seq_id = np.zeros((num_max, ), dtype=np.float32)
            results_frame_id = np.zeros((num_max, ), dtype=np.float32)
            results_object_id = np.zeros((num_max, ), dtype=np.float32)
            results_cls_id = np.zeros((num_max, ), dtype=np.float32)

            # for each image
            count = -1
            for i in range(len(self._mapping)):

                s, c, f = self._mapping[i]
                is_testing = f % _BOP_EVAL_SUBSAMPLING_FACTOR == 0
                if not is_testing:
                    continue

                # intrinsics
                intrinsics = self._intrinsics[c]
                intrinsic_matrix = np.eye(3, dtype=np.float32)
                intrinsic_matrix[0, 0] = intrinsics['fx']
                intrinsic_matrix[1, 1] = intrinsics['fy']
                intrinsic_matrix[0, 2] = intrinsics['ppx']
                intrinsic_matrix[1, 2] = intrinsics['ppy']

                # parse keyframe name
                scene_id, im_id = self.get_bop_id_from_idx(i)

                # load result
                filename = os.path.join(output_dir,
                                        '%04d_%06d.mat' % (scene_id, im_id))
                print(filename)
                if not os.path.exists(filename):
                    print('file %s not exist' % (filename))
                    continue
                result = scipy.io.loadmat(filename)

                # load gt
                d = os.path.join(self._data_dir, self._sequences[s],
                                 self._serials[c])
                label_file = os.path.join(d, self._label_format.format(f))
                label = np.load(label_file)
                cls_indexes = np.array(self._ycb_ids[s]).flatten()

                # poses
                poses = label['pose_y']
                if len(poses.shape) == 2:
                    poses = np.reshape(poses, (1, 3, 4))
                num = poses.shape[0]
                assert num == len(
                    cls_indexes
                ), 'number of poses not equal to number of objects'

                # instance label
                im_label = label['seg']
                instance_ids = np.unique(im_label)
                if instance_ids[0] == 0:
                    instance_ids = instance_ids[1:]
                if instance_ids[-1] == 255:
                    instance_ids = instance_ids[:-1]

                # for each gt poses
                for j in range(len(instance_ids)):
                    cls = instance_ids[j]

                    # find the number of pixels of the object
                    pixels = np.sum(im_label == cls)
                    if pixels < 200:
                        continue
                    count += 1

                    # find the pose
                    object_index = np.where(cls_indexes == cls)[0][0]
                    RT_gt = poses[object_index, :, :]
                    box_gt = self.compute_box(cls - 1, intrinsic_matrix, RT_gt)

                    results_seq_id[count] = scene_id
                    results_frame_id[count] = im_id
                    results_object_id[count] = object_index
                    results_cls_id[count] = cls

                    # network result
                    roi_index = []
                    if len(result['rois']) > 0:
                        for k in range(result['rois'].shape[0]):
                            ind = int(result['rois'][k, 1])
                            if cls == cfg.TRAIN.CLASSES[ind] + 1:
                                roi_index.append(k)

                    # select the roi
                    if len(roi_index) > 1:
                        # overlaps: (rois x gt_boxes)
                        roi_blob = result['rois'][roi_index, :]
                        roi_blob = roi_blob[:, (0, 2, 3, 4, 5, 1)]
                        gt_box_blob = np.zeros((1, 5), dtype=np.float32)
                        gt_box_blob[0, 1:] = box_gt
                        overlaps = bbox_overlaps(
                            np.ascontiguousarray(roi_blob[:, :5],
                                                 dtype=np.float),
                            np.ascontiguousarray(gt_box_blob,
                                                 dtype=np.float)).flatten()
                        assignment = overlaps.argmax()
                        roi_index = [roi_index[assignment]]

                    if len(roi_index) > 0:
                        RT = np.zeros((3, 4), dtype=np.float32)
                        ind = int(result['rois'][roi_index, 1])
                        points = self._points[ind]

                        # initial pose
                        RT[:3, :3] = quat2mat(
                            result['poses_init'][roi_index, 2:6].flatten())
                        RT[:, 3] = result['poses_init'][roi_index, 6:]
                        distances_sys[count, 0] = adi(RT[:3, :3], RT[:, 3],
                                                      RT_gt[:3, :3],
                                                      RT_gt[:, 3], points)
                        distances_non[count, 0] = add(RT[:3, :3], RT[:, 3],
                                                      RT_gt[:3, :3],
                                                      RT_gt[:, 3], points)
                        errors_rotation[count, 0] = re(RT[:3, :3],
                                                       RT_gt[:3, :3])
                        errors_translation[count,
                                           0] = te(RT[:, 3], RT_gt[:, 3])

                        # pose after refinement
                        for k in range(num_iterations):
                            RT[:3, :3] = quat2mat(
                                result['poses_est'][k][roi_index,
                                                       2:6].flatten())
                            RT[:, 3] = result['poses_est'][k][roi_index, 6:]
                            distances_sys[count,
                                          k + 1] = adi(RT[:3, :3], RT[:, 3],
                                                       RT_gt[:3, :3],
                                                       RT_gt[:, 3], points)
                            distances_non[count,
                                          k + 1] = add(RT[:3, :3], RT[:, 3],
                                                       RT_gt[:3, :3],
                                                       RT_gt[:, 3], points)
                            errors_rotation[count, k + 1] = re(
                                RT[:3, :3], RT_gt[:3, :3])
                            errors_translation[count, k + 1] = te(
                                RT[:, 3], RT_gt[:, 3])
                    else:
                        distances_sys[count, :] = np.inf
                        distances_non[count, :] = np.inf
                        errors_rotation[count, :] = np.inf
                        errors_translation[count, :] = np.inf

            distances_sys = distances_sys[:count + 1, :]
            distances_non = distances_non[:count + 1, :]
            errors_rotation = errors_rotation[:count + 1, :]
            errors_translation = errors_translation[:count + 1, :]
            results_seq_id = results_seq_id[:count + 1]
            results_frame_id = results_frame_id[:count + 1]
            results_object_id = results_object_id[:count + 1]
            results_cls_id = results_cls_id[:count + 1]

            results_all = {
                'distances_sys': distances_sys,
                'distances_non': distances_non,
                'errors_rotation': errors_rotation,
                'errors_translation': errors_translation,
                'results_seq_id': results_seq_id,
                'results_frame_id': results_frame_id,
                'results_object_id': results_object_id,
                'results_cls_id': results_cls_id
            }

            filename = os.path.join(output_dir, 'results_deepim.mat')
            scipy.io.savemat(filename, results_all)

        # print the results
        # for each class
        import matplotlib.pyplot as plt
        max_distance = 0.1
        color = ['r', 'g', 'b', 'y', 'c']
        index_plot = [0]
        leng = ['Initial']
        for k in range(num_iterations):
            leng.append('Iteration %d' % (k + 1))
            index_plot.append(k + 1)
        num = len(leng)
        ADD = np.zeros((self._num_classes_all + 1, num), dtype=np.float32)
        ADDS = np.zeros((self._num_classes_all + 1, num), dtype=np.float32)
        TS = np.zeros((self._num_classes_all + 1, num), dtype=np.float32)
        classes = list(copy.copy(self._classes_all))
        classes.append('all')
        for k in range(self._num_classes_all + 1):
            fig = plt.figure(figsize=(16.0, 14.0))
            if k == self._num_classes_all:
                index = range(len(results_cls_id))
            else:
                index = np.where(results_cls_id == k + 1)[0]

            if len(index) == 0:
                continue
            print('%s: %d objects' % (classes[k], len(index)))

            # distance symmetry
            ax = fig.add_subplot(3, 3, 1)
            lengs = []
            for i in index_plot:
                D = distances_sys[index, i]
                ind = np.where(D > max_distance)[0]
                D[ind] = np.inf
                d = np.sort(D)
                n = len(d)
                accuracy = np.cumsum(np.ones((n, ), np.float32)) / n
                plt.plot(d, accuracy, color[i], linewidth=2)
                ADDS[k, i] = VOCap(d, accuracy)
                lengs.append('%s (%.2f)' % (leng[i], ADDS[k, i] * 100))
                print('%s, %s: %d objects missed' %
                      (classes[k], leng[i], np.sum(np.isinf(D))))

            ax.legend(lengs)
            plt.xlabel('Average distance threshold in meter (symmetry)')
            plt.ylabel('accuracy')
            ax.set_title(classes[k])

            # distance non-symmetry
            ax = fig.add_subplot(3, 3, 2)
            lengs = []
            for i in index_plot:
                D = distances_non[index, i]
                ind = np.where(D > max_distance)[0]
                D[ind] = np.inf
                d = np.sort(D)
                n = len(d)
                accuracy = np.cumsum(np.ones((n, ), np.float32)) / n
                plt.plot(d, accuracy, color[i], linewidth=2)
                ADD[k, i] = VOCap(d, accuracy)
                lengs.append('%s (%.2f)' % (leng[i], ADD[k, i] * 100))
                print('%s, %s: %d objects missed' %
                      (classes[k], leng[i], np.sum(np.isinf(D))))

            ax.legend(lengs)
            plt.xlabel('Average distance threshold in meter (non-symmetry)')
            plt.ylabel('accuracy')
            ax.set_title(classes[k])

            # translation
            ax = fig.add_subplot(3, 3, 3)
            lengs = []
            for i in index_plot:
                D = errors_translation[index, i]
                ind = np.where(D > max_distance)[0]
                D[ind] = np.inf
                d = np.sort(D)
                n = len(d)
                accuracy = np.cumsum(np.ones((n, ), np.float32)) / n
                plt.plot(d, accuracy, color[i], linewidth=2)
                TS[k, i] = VOCap(d, accuracy)
                lengs.append('%s (%.2f)' % (leng[i], TS[k, i] * 100))
                print('%s, %s: %d objects missed' %
                      (classes[k], leng[i], np.sum(np.isinf(D))))

            ax.legend(lengs)
            plt.xlabel('Translation threshold in meter')
            plt.ylabel('accuracy')
            ax.set_title(classes[k])

            # rotation histogram
            count = 4
            for i in index_plot:
                ax = fig.add_subplot(3, 3, count)
                D = errors_rotation[index, i]
                ind = np.where(np.isfinite(D))[0]
                D = D[ind]
                ax.hist(D, bins=range(0, 190, 10), range=(0, 180))
                plt.xlabel('Rotation angle error')
                plt.ylabel('count')
                ax.set_title(leng[i])
                count += 1

            # mng = plt.get_current_fig_manager()
            # mng.full_screen_toggle()
            filename = output_dir + '/' + classes[k] + '.png'
            plt.savefig(filename)
            # plt.show()

        # print ADD
        for i in range(cfg.TEST.ITERNUM + 1):
            if i == 0:
                prefix = 'Initial'
            else:
                prefix = 'Iteration %d' % (i)
            print('==================ADD %s======================' % (prefix))
            for k in range(len(classes)):
                print('%s: %f' % (classes[k], ADD[k, i]))

            print('mean: %f' % (np.mean(ADD[:-1, i])))

            for k in range(len(classes)):
                print('%f' % (ADD[k, i]))
            print(cfg.TRAIN.SNAPSHOT_INFIX)
            print('===========================================')

            # print ADD-S
            print('==================ADD-S %s====================' % (prefix))
            for k in range(len(classes)):
                print('%s: %f' % (classes[k], ADDS[k, i]))

            print('mean: %f' % (np.mean(ADDS[:-1, i])))

            for k in range(len(classes)):
                print('%f' % (ADDS[k, i]))
            print(cfg.TRAIN.SNAPSHOT_INFIX)
            print('===========================================')
Example #55
0
def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[16, ],
                        anchor_scales=[4, 8, 16, 32]):
    """
    Assign anchors to ground-truth targets. Produces anchor classification
    labels and bounding-box regression targets.
    Parameters
    ----------
    rpn_cls_score: for pytorch (1, Ax2, H, W) bg/fg scores of previous conv layer
    gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class]
    gt_ishard: (G, 1), 1 or 0 indicates difficult or not
    dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0
    im_info: a list of [image_height, image_width, scale_ratios]
    _feat_stride: the downsampling ratio of feature map to the original input image
    anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
    ----------
    Returns
    ----------
    rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare
    rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform)
                            that are the regression objectives
    rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg
    rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg,
                            beacuse the numbers of bgs and fgs mays significiantly different
    """
    _anchors = generate_anchors(scales=np.array(anchor_scales))
    _num_anchors = _anchors.shape[0]

    if DEBUG:
        print ('anchors:')
        print (_anchors)
        print ('anchor shapes:')
        print (np.hstack((
            _anchors[:, 2::4] - _anchors[:, 0::4],
            _anchors[:, 3::4] - _anchors[:, 1::4],
        )))
        _counts = cfg.EPS
        _sums = np.zeros((1, 4))
        _squared_sums = np.zeros((1, 4))
        _fg_sum = 0
        _bg_sum = 0
        _count = 0

    # allow boxes to sit over the edge by a small amount
    _allowed_border = 0
    # map of shape (..., H, W)
    # height, width = rpn_cls_score.shape[1:3]

    im_info = im_info[0]

    # Algorithm:
    #
    # for each (H, W) location i
    #   generate 9 anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the 9 anchors
    # filter out-of-image anchors
    # measure GT overlap

    assert rpn_cls_score.shape[0] == 1, \
        'Only single item batches are supported'

    # map of shape (..., H, W)
    # pytorch (bs, c, h, w)
    height, width = rpn_cls_score.shape[2:4]

    if DEBUG:
        print ('AnchorTargetLayer: height', height, 'width', width)
        print ('')
        print ('im_size: ({}, {})'.format(im_info[0], im_info[1]))
        print ('scale: {}'.format(im_info[2]))
        print ('height, width: ({}, {})'.format(height, width))
        print ('rpn: gt_boxes.shape', gt_boxes.shape)
        print ('rpn: gt_boxes', gt_boxes)

    # 1. Generate proposals from bbox deltas and shifted anchors
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)  # in W H order
    # K is H x W
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]
    all_anchors = (_anchors.reshape((1, A, 4)) +
                   shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
    all_anchors = all_anchors.reshape((K * A, 4))
    total_anchors = int(K * A)

    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border) &
        (all_anchors[:, 1] >= -_allowed_border) &
        (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    if DEBUG:
        print ('total_anchors', total_anchors)
        print ('inds_inside', len(inds_inside))

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]
    if DEBUG:
        print ('anchors.shape', anchors.shape)

    # label: 1 is positive, 0 is negative, -1 is dont care
    # (A)
    labels = np.empty((len(inds_inside),), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt), shape is A x G
    overlaps = bbox_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    argmax_overlaps = overlaps.argmax(axis=1)  # (A)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    gt_argmax_overlaps = overlaps.argmax(axis=0)  # G
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    labels[gt_argmax_overlaps] = 1
    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # preclude dontcare areas
    if dontcare_areas is not None and dontcare_areas.shape[0] > 0:
        # intersec shape is D x A
        intersecs = bbox_intersections(
            np.ascontiguousarray(dontcare_areas, dtype=np.float),  # D x 4
            np.ascontiguousarray(anchors, dtype=np.float)  # A x 4
        )
        intersecs_ = intersecs.sum(axis=0)  # A x 1
        labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1

    # preclude hard samples that are highly occlusioned, truncated or difficult to see
    if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0:
        assert gt_ishard.shape[0] == gt_boxes.shape[0]
        gt_ishard = gt_ishard.astype(int)
        gt_hardboxes = gt_boxes[gt_ishard == 1, :]
        if gt_hardboxes.shape[0] > 0:
            # H x A
            hard_overlaps = bbox_overlaps(
                np.ascontiguousarray(gt_hardboxes, dtype=np.float),  # H x 4
                np.ascontiguousarray(anchors, dtype=np.float))  # A x 4
            hard_max_overlaps = hard_overlaps.max(axis=0)  # (A)
            labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1
            max_intersec_label_inds = hard_overlaps.argmax(axis=1)  # H x 1
            labels[max_intersec_label_inds] = -1  #

    # subsample positive labels if we have too many
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        disable_inds = npr.choice(
            fg_inds, size=(len(fg_inds) - num_fg), replace=False)
        labels[disable_inds] = -1

    # subsample negative labels if we have too many
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1
        # print "was %s inds, disabling %s, now %s inds" % (
        # len(bg_inds), len(disable_inds), np.sum(labels == 0))

    # bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        # num_examples = np.sum(labels >= 0) + 1
        # positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        # negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        positive_weights = np.ones((1, 4))
        negative_weights = np.zeros((1, 4))
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            (np.sum(labels == 1)) + 1)
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            (np.sum(labels == 0)) + 1)
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    if DEBUG:
        _sums += bbox_targets[labels == 1, :].sum(axis=0)
        _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
        _counts += np.sum(labels == 1)
        means = _sums / _counts
        stds = np.sqrt(_squared_sums / _counts - means ** 2)
        print ('means:')
        print (means)
        print ('stdevs:')
        print (stds)

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

    if DEBUG:
        print ('rpn: max max_overlap', np.max(max_overlaps))
        print ('rpn: num_positive', np.sum(labels == 1))
        print ('rpn: num_negative', np.sum(labels == 0))
        _fg_sum += np.sum(labels == 1)
        _bg_sum += np.sum(labels == 0)
        _count += 1
        print ('rpn: num_positive avg', _fg_sum / _count)
        print ('rpn: num_negative avg', _bg_sum / _count)

    # labels
    # pdb.set_trace()
    labels = labels.reshape((1, height, width, A))
    labels = labels.transpose(0, 3, 1, 2)
    rpn_labels = labels.reshape((1, 1, A * height, width)).transpose(0, 2, 3, 1)

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    # assert bbox_inside_weights.shape[2] == height
    # assert bbox_inside_weights.shape[3] == width

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
    # assert bbox_outside_weights.shape[2] == height
    # assert bbox_outside_weights.shape[3] == width

    rpn_bbox_outside_weights = bbox_outside_weights

    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights
Example #56
0
    def evaluate_recall(self, candidate_boxes=None, thresholds=None,
                        area='all', limit=None):
        """Evaluate detection proposal recall metrics.

        Returns:
            results: dictionary of results with keys
                'ar': average recall
                'recalls': vector recalls at each IoU overlap threshold
                'thresholds': vector of IoU overlap thresholds
                'gt_overlaps': vector of all ground-truth overlaps
        """
        # Record max overlap value for each gt box
        # Return vector of overlap values
        areas = { 'all': 0, 'small': 1, 'medium': 2, 'large': 3,
                  '96-128': 4, '128-256': 5, '256-512': 6, '512-inf': 7}
        area_ranges = [ [0**2, 1e5**2],    # all
                        [0**2, 32**2],     # small
                        [32**2, 96**2],    # medium
                        [96**2, 1e5**2],   # large
                        [96**2, 128**2],   # 96-128
                        [128**2, 256**2],  # 128-256
                        [256**2, 512**2],  # 256-512
                        [512**2, 1e5**2],  # 512-inf
                      ]
        assert areas.has_key(area), 'unknown area range: {}'.format(area)
        area_range = area_ranges[areas[area]]
        gt_overlaps = np.zeros(0)
        num_pos = 0
        for i in xrange(self.num_images):
            # Checking for max_overlaps == 1 avoids including crowd annotations
            # (...pretty hacking :/)
            max_gt_overlaps = self.roidb[i]['gt_overlaps'].toarray().max(axis=1)
            gt_inds = np.where((self.roidb[i]['gt_classes'] > 0) &
                               (max_gt_overlaps == 1))[0]
            gt_boxes = self.roidb[i]['boxes'][gt_inds, :]
            gt_areas = self.roidb[i]['seg_areas'][gt_inds]
            valid_gt_inds = np.where((gt_areas >= area_range[0]) &
                                     (gt_areas <= area_range[1]))[0]
            gt_boxes = gt_boxes[valid_gt_inds, :]
            num_pos += len(valid_gt_inds)

            if candidate_boxes is None:
                # If candidate_boxes is not supplied, the default is to use the
                # non-ground-truth boxes from this roidb
                non_gt_inds = np.where(self.roidb[i]['gt_classes'] == 0)[0]
                boxes = self.roidb[i]['boxes'][non_gt_inds, :]
            else:
                boxes = candidate_boxes[i]
            if boxes.shape[0] == 0:
                continue
            if limit is not None and boxes.shape[0] > limit:
                boxes = boxes[:limit, :]

            overlaps = bbox_overlaps(boxes.astype(np.float),
                                     gt_boxes.astype(np.float))

            _gt_overlaps = np.zeros((gt_boxes.shape[0]))
            for j in xrange(gt_boxes.shape[0]):
                # find which proposal box maximally covers each gt box
                argmax_overlaps = overlaps.argmax(axis=0)
                # and get the iou amount of coverage for each gt box
                max_overlaps = overlaps.max(axis=0)
                # find which gt box is 'best' covered (i.e. 'best' = most iou)
                gt_ind = max_overlaps.argmax()
                gt_ovr = max_overlaps.max()
                assert(gt_ovr >= 0)
                # find the proposal box that covers the best covered gt box
                box_ind = argmax_overlaps[gt_ind]
                # record the iou coverage of this gt box
                _gt_overlaps[j] = overlaps[box_ind, gt_ind]
                assert(_gt_overlaps[j] == gt_ovr)
                # mark the proposal box and the gt box as used
                overlaps[box_ind, :] = -1
                overlaps[:, gt_ind] = -1
            # append recorded iou coverage level
            gt_overlaps = np.hstack((gt_overlaps, _gt_overlaps))

        gt_overlaps = np.sort(gt_overlaps)
        if thresholds is None:
            step = 0.05
            thresholds = np.arange(0.5, 0.95 + 1e-5, step)
        recalls = np.zeros_like(thresholds)
        # compute recall for each iou threshold
        for i, t in enumerate(thresholds):
            recalls[i] = (gt_overlaps >= t).sum() / float(num_pos)
        # ar = 2 * np.trapz(recalls, thresholds)
        ar = recalls.mean()
        return ar, gt_overlaps, recalls, thresholds
Example #57
0
    def forward(self, bottom, top):
        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'

        # map of shape (..., H, W)
        height, width = bottom[0].data.shape[-2:]
        # GT boxes (x1, y1, x2, y2, label)
        gt_boxes = bottom[1].data
        # im_info
        im_info = bottom[2].data[0, :]

        # 1. Generate proposals from bbox deltas and shifted anchors
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        all_anchors = (self._anchors.reshape((1, A, 4)) +
                       shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
        all_anchors = all_anchors.reshape((K * A, 4))
        total_anchors = int(K * A)

        # only keep anchors inside the image
        inds_inside = np.where(
            (all_anchors[:, 0] >= -self._allowed_border) &
            (all_anchors[:, 1] >= -self._allowed_border) &
            (all_anchors[:, 2] < im_info[1] + self._allowed_border) &  # width
            (all_anchors[:, 3] < im_info[0] + self._allowed_border)    # height
        )[0]

        # keep only inside anchors
        if inds_inside.shape[0]==0:
            # If no anchors inside use whatever anchors we have
            inds_inside = np.arange(0,all_anchors.shape[0])

        anchors = all_anchors[inds_inside, :]

        # label: 1 is positive, 0 is negative, -1 is dont care
        labels = np.empty((len(inds_inside), ), dtype=np.float32)
        labels.fill(-1)

        # overlaps between the anchors and the gt boxes
        # overlaps (ex, gt)

        overlaps = bbox_overlaps(
            np.ascontiguousarray(anchors, dtype=np.float),
            np.ascontiguousarray(gt_boxes, dtype=np.float))
        argmax_overlaps = overlaps.argmax(axis=1)
        max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]

        # assign bg labels first so that positive labels can clobber them
        labels[max_overlaps < cfg.TRAIN.ANCHOR_NEGATIVE_OVERLAP] = 0

        # fg label: for each gt, anchor with highest overlap
        if cfg.TRAIN.FORCE_FG_FOR_EACH_GT:
            gt_argmax_overlaps = overlaps.argmax(axis=0)
            gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                       np.arange(overlaps.shape[1])]

            gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
            labels[gt_argmax_overlaps] = 1

        # fg label: above threshold IOU
        labels[max_overlaps >= self._positive_overlap] = 1

        # Subsample positives
        num_fg = int(cfg.TRAIN.ANCHOR_FG_FRACTION * cfg.TRAIN.ANCHORS_PER_BATCH)
        fg_inds = np.where(labels == 1)[0]
        if len(fg_inds) > num_fg:
            if self._hard_mining and cfg.TRAIN.HARD_POSITIVE_MINING:
                ohem_scores = bottom[4].data[:, self._num_anchors:, :, :]
                ohem_scores = ohem_scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
                ohem_scores = ohem_scores[inds_inside]
                pos_ohem_scores = 1 - ohem_scores[fg_inds]
                order_pos_ohem_scores = pos_ohem_scores.ravel().argsort()[::-1]
                ohem_sampled_fgs = fg_inds[order_pos_ohem_scores[:num_fg]]
                labels[fg_inds] = -1
                labels[ohem_sampled_fgs] = 1
            else:
                disable_inds = npr.choice(
                    fg_inds, size=(len(fg_inds) - num_fg), replace=False)
                labels[disable_inds] = -1

        # Subsample negatives
        n_fg = np.sum(labels == 1)
        num_bg = cfg.TRAIN.ANCHORS_PER_BATCH - n_fg
        bg_inds = np.where(labels == 0)[0]
        if len(bg_inds) > num_bg:
            if not self._hard_mining:
                # randomly sub-sample negatives
                disable_inds = npr.choice(
                    bg_inds, size=(len(bg_inds) - num_bg), replace=False)
                labels[disable_inds] = -1
            else:
                # sort ohem scores
                ohem_scores = bottom[4].data[:, self._num_anchors:, :, :]
                ohem_scores = ohem_scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
                ohem_scores = ohem_scores[inds_inside]
                neg_ohem_scores = ohem_scores[bg_inds]
                order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1]
                ohem_sampled_bgs = bg_inds[order_neg_ohem_scores[:num_bg]]
                labels[bg_inds] = -1
                labels[ohem_sampled_bgs] = 0

        bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
        bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.BBOX_INSIDE_WEIGHTS)

        bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
        if cfg.TRAIN.POSITIVE_WEIGHT < 0:
            # uniform weighting of examples (given non-uniform sampling)
            num_examples = np.sum(labels >= 0)
            positive_weights = np.ones((1, 4)) * 1.0 / num_examples
            negative_weights = np.ones((1, 4)) * 1.0 / num_examples
        else:
            assert ((cfg.TRAIN.POSITIVE_WEIGHT > 0) &
                    (cfg.TRAIN.POSITIVE_WEIGHT < 1))
            positive_weights = (cfg.TRAIN.POSITIVE_WEIGHT /
                                np.sum(labels == 1))
            negative_weights = ((1.0 - cfg.TRAIN.POSITIVE_WEIGHT) /
                                np.sum(labels == 0))
        bbox_outside_weights[labels == 1, :] = positive_weights
        bbox_outside_weights[labels == 0, :] = negative_weights

        # map up to original set of anchors
        labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
        bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
        bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
        bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)

        # labels
        labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
        labels = labels.reshape((1, 1, A * height, width))
        top[0].reshape(*labels.shape)
        top[0].data[...] = labels

        # bbox_targets
        bbox_targets = bbox_targets \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        top[1].reshape(*bbox_targets.shape)
        top[1].data[...] = bbox_targets

        # bbox_inside_weights
        bbox_inside_weights = bbox_inside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_inside_weights.shape[2] == height
        assert bbox_inside_weights.shape[3] == width
        top[2].reshape(*bbox_inside_weights.shape)
        top[2].data[...] = bbox_inside_weights

        # bbox_outside_weights
        bbox_outside_weights = bbox_outside_weights \
            .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
        assert bbox_outside_weights.shape[2] == height
        assert bbox_outside_weights.shape[3] == width
        top[3].reshape(*bbox_outside_weights.shape)
        top[3].data[...] = bbox_outside_weights
Example #58
0
def prepare_roidb(imdb):
    """Enrich the imdb's roidb by adding some derived quantities(可以求导的量) that
    are useful for training. This function precomputes the maximum
    overlap, taken over ground-truth boxes, between each ROI and
    each ground-truth box. The class with maximum overlap is also
    recorded.
    """
    # 如果有cache文件,加载后直接返回即可
    cache_file = os.path.join(imdb.cache_path, imdb.name + '_gt_roidb_prepared.pkl')
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as fid:
            imdb._roidb = cPickle.load(fid)
        print '{} gt roidb prepared loaded from {}'.format(imdb.name, cache_file)
        return

    roidb = imdb.roidb
    for i in xrange(len(imdb.image_index)):
        roidb[i]['image'] = imdb.image_path_at(i)
        # 这应该是gt box
        # roidb中的box并没有对应到原图!!!!!!!!!!!!!!!!!!
        boxes = roidb[i]['boxes']
        labels = roidb[i]['gt_classes']
        # feamap每个点9个box,每个box对应两个概率:是fg的概率;不是bg的概率
        # 生成的就是个空array
        # array([], shape=(0, 18), dtype=float32)
        info_boxes = np.zeros((0, 18), dtype=np.float32)

        if boxes.shape[0] == 0:
            roidb[i]['info_boxes'] = info_boxes
            continue

        # compute grid boxes
        s = PIL.Image.open(imdb.image_path_at(i)).size
        image_height = s[1]
        image_width = s[0]
        # 输入:图片的真是高度和宽度
        # 输出:boxes_grid:非常多(feamap所有点的数量*num_aspect)个[x1,y1,x2,y2], centers[:,0], centers[:,1]
        # 输出:box在原图中的左上角和右下角坐标;feature map中各个点对应的x坐标和y坐标
        # 这个box不是gt,这里是给feature map中的每个点生成多个box(不同比例的)    roidb中的box是 gt
        boxes_grid, cx, cy = get_boxes_grid(image_height, image_width)
        

        # Scales to use during training (can list multiple scales)
        # Each scale is the pixel size of an image's shortest side
        #__C.TRAIN.SCALES = (600,)
        # for each scale
        for scale_ind, scale in enumerate(cfg.TRAIN.SCALES):
            # scale应该是16
            boxes_rescaled = boxes * scale

            # compute overlap
            overlaps = bbox_overlaps(boxes_grid.astype(np.float), boxes_rescaled.astype(np.float))
            # 为每个box 找个与它最match的gt box
            # 最大的IoU值
            max_overlaps = overlaps.max(axis = 1)
            # 最大的IoU值对应的gt box的索引
            argmax_overlaps = overlaps.argmax(axis = 1)
            # 最match的gt box对应的类别
            max_classes = labels[argmax_overlaps]

            # select positive boxes
            fg_inds = []
            # 遍历所有类别,找出满足条件的boxes作为fg
            for k in xrange(1, imdb.num_classes):
                # IoU超过一定阈值的box才是fg!
                fg_inds.extend(np.where((max_classes == k) & (max_overlaps >= cfg.TRAIN.FG_THRESH))[0])

            if len(fg_inds) > 0:
                # fg对应的gt box的索引
                gt_inds = argmax_overlaps[fg_inds]
                # bounding box regression targets
                # 计算当前fg box 和其对应的 gt box 的偏移量
                # 返回值是2维的,有4列。第0列:x的偏移量;第1列:y的偏移量;第2列:w的伸缩量;第4列:h的伸缩量
                gt_targets = _compute_targets(boxes_grid[fg_inds,:], boxes_rescaled[gt_inds,:])
                
                # scale mapping for RoI pooling
                # cfg中没有这个变量???
                scale_ind_map = cfg.TRAIN.SCALE_MAPPING[scale_ind]
                scale_map = cfg.TRAIN.SCALES[scale_ind_map]

                # 创建fg对应的list
                # contruct the list of positive boxes
                # (cx, cy, scale_ind, box, scale_ind_map, box_map, gt_label, gt_sublabel, target)
                # 这里的18可不是9个anchor,而是1个anchor,用了18列存储相关信息
                info_box = np.zeros((len(fg_inds), 18), dtype=np.float32)
                info_box[:, 0] = cx[fg_inds]
                info_box[:, 1] = cy[fg_inds]
                info_box[:, 2] = scale_ind
                info_box[:, 3:7] = boxes_grid[fg_inds,:]
                info_box[:, 7] = scale_ind_map
                info_box[:, 8:12] = boxes_grid[fg_inds,:] * scale_map / scale
                info_box[:, 12] = labels[gt_inds]
                info_box[:, 14:] = gt_targets
                info_boxes = np.vstack((info_boxes, info_box))

        roidb[i]['info_boxes'] = info_boxes

    with open(cache_file, 'wb') as fid:
        cPickle.dump(roidb, fid, cPickle.HIGHEST_PROTOCOL)
    print 'wrote gt roidb prepared to {}'.format(cache_file)
def anchor_target_layer(rpn_cls_prob, gt_boxes, im_info, _feat_stride,
                        all_anchors, num_anchors, target_name):
    """Same as the anchor target layer in original Fast/er RCNN """
    A = num_anchors
    total_anchors = all_anchors.shape[0]
    K = total_anchors / num_anchors
    hard_mining = cfg.TRAIN.HARD_POSITIVE_MINING

    # allow boxes to sit over the edge by a small amount
    # _allowed_border = 0
    # follow the SSH setting
    if target_name == "M3":
        _allowed_border = 512
    else:
        _allowed_border = 0

    # map of shape (..., H, W)
    height, width = rpn_cls_prob.shape[1:3]

    # print("image_hw:", im_info[0], im_info[1])
    # only keep anchors inside the image
    inds_inside = np.where(
        (all_anchors[:, 0] >= -_allowed_border)
        & (all_anchors[:, 1] >= -_allowed_border)
        & (all_anchors[:, 2] < im_info[1] + _allowed_border) &  # width
        (all_anchors[:, 3] < im_info[0] + _allowed_border)  # height
    )[0]

    # only keep anchors inside anchors
    # keep away the problem of ‘ValueError: attempt to get argmax of an empty sequence’ during training
    if inds_inside.shape[0] == 0:
        # If no anchors inside use whatever anchors we have
        inds_inside = np.arange(0, total_anchors)

    # keep only inside anchors
    anchors = all_anchors[inds_inside, :]

    # label: 1 is positive, 0 is negative, -1 is dont care
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)

    # overlaps between the anchors and the gt boxes
    # overlaps (ex, gt)
    overlaps = bbox_overlaps(np.ascontiguousarray(anchors, dtype=np.float),
                             np.ascontiguousarray(gt_boxes, dtype=np.float))

    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]

    if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels first so that positive labels can clobber them
        # first set the negatives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    # fg label: for each gt, anchor with highest overlap
    if cfg.TRAIN.FORCE_FG_FOR_EACH_GT:
        gt_argmax_overlaps = overlaps.argmax(axis=0)
        gt_max_overlaps = overlaps[gt_argmax_overlaps,
                                   np.arange(overlaps.shape[1])]
        gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
        labels[gt_argmax_overlaps] = 1

    # fg label: above threshold IOU
    labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1

    if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
        # assign bg labels last so that negative labels can clobber positives
        labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0

    ##################### Add OHEM for subsample positive labels(Online Hard Examples Mining) ##########
    num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    if len(fg_inds) > num_fg:
        if hard_mining:
            ohem_scores = rpn_cls_prob[:, :, :, num_anchors:]
            ohem_scores = ohem_scores.reshape((-1, 1))
            ohem_scores = ohem_scores[inds_inside]
            pos_ohem_scores = 1 - ohem_scores[fg_inds]
            order_pos_ohem_scores = pos_ohem_scores.ravel().argsort()[::-1]
            ohem_sampled_fgs = fg_inds[order_pos_ohem_scores[:num_fg]]
            labels[fg_inds] = -1
            labels[ohem_sampled_fgs] = 1
        else:
            disable_inds = npr.choice(fg_inds,
                                      size=(len(fg_inds) - num_fg),
                                      replace=False)
            labels[disable_inds] = -1
    ########################################## End ##################################################

    ################# Add OHEM for subsample negative labels(Online Hard Examples Mining) ############
    num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        if not hard_mining:
            # randomly sub-sampling negatives
            disable_inds = npr.choice(bg_inds,
                                      size=(len(bg_inds) - num_bg),
                                      replace=False)
            labels[disable_inds] = -1
        else:
            # sort ohem scores
            ohem_scores = rpn_cls_prob[:, :, :, num_anchors:]
            ohem_scores = ohem_scores.reshape((-1, 1))
            ohem_scores = ohem_scores[inds_inside]
            neg_ohem_scores = ohem_scores[bg_inds]
            order_neg_ohem_scores = neg_ohem_scores.ravel().argsort()[::-1]
            ohem_sampled_bgs = bg_inds[order_neg_ohem_scores[:num_bg]]
            labels[bg_inds] = -1
            labels[ohem_sampled_bgs] = 0
    ########################################## End ##############################################

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])

    bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # only the positive ones have regression targets
    bbox_inside_weights[labels == 1, :] = np.array(
        cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)

    bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
    if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
        # uniform weighting of examples (given non-uniform sampling)
        num_examples = np.sum(labels >= 0)
        positive_weights = np.ones((1, 4)) * 1.0 / num_examples
        negative_weights = np.ones((1, 4)) * 1.0 / num_examples
    else:
        assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
                (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
        positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
                            np.sum(labels == 1))
        negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
                            np.sum(labels == 0))
    bbox_outside_weights[labels == 1, :] = positive_weights
    bbox_outside_weights[labels == 0, :] = negative_weights

    # map up to original set of anchors
    labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
    bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
    bbox_inside_weights = _unmap(bbox_inside_weights,
                                 total_anchors,
                                 inds_inside,
                                 fill=0)
    bbox_outside_weights = _unmap(bbox_outside_weights,
                                  total_anchors,
                                  inds_inside,
                                  fill=0)

    # labels
    labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
    labels = labels.reshape((1, 1, A * height, width))
    rpn_labels = labels

    # bbox_targets
    bbox_targets = bbox_targets \
        .reshape((1, height, width, A * 4))

    rpn_bbox_targets = bbox_targets
    # bbox_inside_weights
    bbox_inside_weights = bbox_inside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_inside_weights = bbox_inside_weights

    # bbox_outside_weights
    bbox_outside_weights = bbox_outside_weights \
        .reshape((1, height, width, A * 4))

    rpn_bbox_outside_weights = bbox_outside_weights
    return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights