def bbox_reg(self, boxes, box_deltas, im):
   if CUDA_AVAILABLE:
       boxes = boxes.data[:,1:].cpu().numpy()
       box_deltas = box_deltas.data.cpu().numpy()
   else:
       boxes = boxes.data[:,1:].numpy()
       box_deltas = box_deltas.data.numpy()
   pred_boxes = bbox_transform_inv(boxes, box_deltas)
   pred_boxes = clip_boxes(pred_boxes, im.size()[-2:])
   return _tovar(pred_boxes)
Esempio n. 2
0
    def get_roi_boxes(self, anchors, rpn_map, rpn_bbox_deltas, im):
        # TODO fix this!!!
        im_info = (100, 100, 1)

        if CUDA_AVAILABLE:
            bbox_deltas = rpn_bbox_deltas.data.cpu().numpy()
        else:
            bbox_deltas = rpn_bbox_deltas.data.numpy()

        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        #scores = bottom[0].data[:, self._num_anchors:, :, :]
        if CUDA_AVAILABLE:
            scores = rpn_map.data[:, self._num_anchors:, :, :].cpu().numpy()
        else:
            scores = rpn_map.data[:, self._num_anchors:, :, :].numpy()

        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im.size()[-2:])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = filter_boxes(proposals, self.min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if self.pre_nms_topN > 0:
            order = order[:self.pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), self.nms_thresh)
        if self.post_nms_topN > 0:
            keep = keep[:self.post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        return proposals, scores
Esempio n. 3
0
    def get_proposal_boxes(self, rpn_bbox_deltas, rpn_cls_probs):
        """
        applies rpn bbox deltas to anchor boxes to get region proposals.
        Also filter by non-max suppression and limit to 2k boxes
        Arguments:
            rpn_bbox_deltas (Tensor) : (9*fH*fW, 4)
            rpn_cls_probs (Tensor) : (9*fH*fW,, 2)
        Return:
            proposals_boxes (Ndarray) : ( # proposal boxes, 4)
            scores (Ndarray) :  ( # proposal boxes, )
        """
        all_anchor_boxes = self.all_anchor_boxes
        nms_thresh = self.nms_thresh  # prob thresh
        pre_nms_limit = self.pre_nms_limit
        post_nms_limit = self.post_nms_limit  # eval with different numbers at test

        rpn_bbox_deltas = rpn_bbox_deltas.data.cpu().numpy()
        pos_score = rpn_cls_probs.data.cpu().numpy()[:, 1]

        # 1. Convert anchors into proposal via bbox transformation
        proposal_boxes = bbox_transform_inv(
            all_anchor_boxes,
            rpn_bbox_deltas)  # (H/16 * W/16 * 9, 4) all proposal boxes

        height, width = self.feature_map_dim[-2:]
        # 2. ignore out of bounds proposals during training
        if not self.test:
            indices = filter_cross_boundary_boxes(proposal_boxes,
                                                  (height * 16, width * 16))
            proposal_boxes = proposal_boxes[indices]
            pos_score = pos_score[indices]

        # if no boxes are in the image boundaries, skip
        if len(proposal_boxes) == 0:
            return [], []
        # 3. pre nms limit
        limit = np.argsort(pos_score)[:pre_nms_limit]
        proposal_boxes = proposal_boxes[limit]
        pos_score = pos_score[limit]
        # 3. apply nms (e.g. threshold = 0.7)
        proposal_boxes, scores = non_max_suppression(proposal_boxes, pos_score,
                                                     nms_thresh,
                                                     post_nms_limit)
        return proposal_boxes, scores
Esempio n. 4
0
def generate_proposals(data):
    # Extract feature map
    feature_map = CNN_model_cut.predict(
        data.reshape(-1, data.shape[0], data.shape[1], data.shape[2]))
    padded_fcmap = np.pad(feature_map, ((0, 0), (1, 1), (1, 1), (0, 0)),
                          mode='constant')

    # Extract RPN results
    RPN_results = RPN_model.predict(padded_fcmap)
    anchor_probs = RPN_results[0].reshape((-1, 1))
    anchor_targets = RPN_results[1].reshape((-1, 4))

    # Original anchors
    feature_size = feature_map.shape[1]
    number_feature_points = feature_size * feature_size
    feature_stride = int(image_size / feature_size)
    base_anchors = generate_anchors(feature_stride,
                                    feature_stride,
                                    ratios=ANCHOR_RATIOS,
                                    scales=ANCHOR_SCALES)
    shift = np.arange(0, feature_size) * feature_stride
    shift_x, shift_y = np.meshgrid(shift, shift)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    original_anchors = (base_anchors.reshape(
        (1, anchor_number, 4)) + shifts.reshape(
            (1, number_feature_points, 4)).transpose((1, 0, 2)))
    original_anchors = original_anchors.reshape((-1, 4))

    # Proposals by the RPN
    proposals = bbox_transform_inv(original_anchors, anchor_targets)
    proposals = clip_boxes(proposals,
                           (data.shape[0], data.shape[1]))  # clip to image.
    high_to_low_scores = anchor_probs.ravel().argsort()[::-1]  # highest scores
    high_to_low_scores = high_to_low_scores[0:N]
    proposals = proposals[high_to_low_scores, :]
    anchor_probs = anchor_probs[high_to_low_scores]

    del original_anchors
    del RPN_results
    del feature_map
    del padded_fcmap

    return proposals, anchor_probs
Esempio n. 5
0
def define_bbox(pred_bbox_delta, ANCHOR_BOX):
	delta_x, delta_y, delta_w, delta_h = torch.unbind(
	  pred_bbox_delta, dim=2)
	# set_anchors(mc, scale)

	anchor_x = ANCHOR_BOX[:, 0]
	anchor_y = ANCHOR_BOX[:, 1]
	anchor_w = ANCHOR_BOX[:, 2]
	anchor_h = ANCHOR_BOX[:, 3]

	box_center_x =  anchor_x + delta_x * anchor_w
	box_center_y =  anchor_y + delta_y * anchor_h
	# box_width = anchor_w * util.safe_exp(delta_w, EXP_THRESH)
	# box_height = anchor_h * util.safe_exp(delta_h, EXP_THRESH)
	box_width = anchor_w * torch.exp(delta_w)
	box_height = anchor_h * torch.exp(delta_h) # ok, this needs to be done on CPU side

	xmins, ymins, xmaxs, ymaxs = util.bbox_transform(
	    [box_center_x, box_center_y, box_width, box_height])

	xmins = xmins.cpu().detach().numpy()
	ymins = ymins.cpu().detach().numpy()
	xmaxs = xmaxs.cpu().detach().numpy()
	ymaxs = ymaxs.cpu().detach().numpy()

	# The max x position is mc.IMAGE_WIDTH - 1 since we use zero-based
	# pixels. Same for y.
	xmins = np.minimum(
	    np.maximum(0.0, xmins), IMAGE_WIDTH-1.0)

	ymins = np.minimum(
	    np.maximum(0.0, ymins), IMAGE_HEIGHT-1.0)

	xmaxs = np.maximum(
	    np.minimum(IMAGE_WIDTH-1.0, xmaxs), 0.0)

	ymaxs = np.maximum(
	    np.minimum(IMAGE_HEIGHT-1.0, ymaxs), 0.0)

	det_boxes = torch.transpose(
	    torch.stack(util.bbox_transform_inv(torch.FloatTensor([xmins, ymins, xmaxs, ymaxs]))),
	    1, 2) # this is not needed for hardware implementation
	return det_boxes
Esempio n. 6
0
 def get_predictions(self, img, ignore_background=True):
     """
     :param img:
     :return: predicted_targets: (N, x1, y1, x2, y1, C)
     """
     rpn_cls_probs, rpn_bbox_deltas, pred_label, pred_bbox_deltas = self.forward(
         img)
     proposals, _ = self.get_rpn_proposals()
     _, pred_class = pred_label.max(dim=1)
     pred_class = pred_class.cpu().long()
     pred_bbox_deltas = pred_bbox_deltas.data.cpu()
     idx = torch.arange(0, len(pred_class)).long()
     pred_deltas_top_class = pred_bbox_deltas[
         idx, pred_class.data.long()].numpy()
     pred_boxes = bbox_transform_inv(proposals, pred_deltas_top_class)
     pred_targets = np.hstack(
         [pred_boxes,
          pred_class.data.cpu().numpy().reshape(-1, 1)])
     if ignore_background:
         return pred_targets[pred_targets[:, -1] != 0]
     return pred_targets
Esempio n. 7
0
 def bbox_reg(self, boxes, box_deltas, im):
     boxes = boxes.data[:, 1:].numpy()
     box_deltas = box_deltas.data.numpy()
     pred_boxes = bbox_transform_inv(boxes, box_deltas)
     pred_boxes = clip_boxes(pred_boxes, im.size()[-2:])
     return to_var(pred_boxes)
Esempio n. 8
0
    def forward(self, input):
    """
    Parameters
    ----------
    input - list contains:
        cls_prob_alls: (BS , H , W , Ax2) outputs of RPN (here - Feature Pyramid Network),
                       prob of bg or fg;
        bbox_pred_alls: (BS , H , W , Ax4), rgs boxes output of RPN;
        im_info: a list of [image_height, image_width, scale_ratios];
        rpn_shapes: width and height of feature map;
    ----------
    Returns
    ----------
    rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]
    # Algorithm:
    #
    # for each (H, W) location i
    # generate A anchor boxes centered on cell i
    # apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    """

    scores = input[0][:, :, 1]  # batch_size x num_rois x 1
    bbox_deltas = input[1]      # batch_size x num_rois x 4
    im_info = input[2]

    anchors = torch.from_numpy(generate_anchors_all_pyramids(self.fpn_scales, self.anchor_ratios, 
                feat_shapes, self.feat_strides, self.fpn_anchor_stride)).type_as(scores)
    num_anchors = anchors.size(0)
     
    anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4)

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info, batch_size)
    # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()
                
    scores_keep = scores
    proposals_keep = proposals

    _, order = torch.sort(scores_keep, 1, True)

    output = scores.new(batch_size, self.post_nms_topN, 5).zero_()

    for i in range(batch_size):
        # # 3. remove predicted boxes with either height or width < threshold
        # # (NOTE: convert min_size to input image scale stored in im_info[2])
        proposals_single = proposals_keep[i]
        scores_single = scores_keep[i]

        # # 4. sort all (proposal, score) pairs by score from highest to lowest
        # # 5. take top pre_nms_topN (e.g. 6000)
        order_single = order[i]

        if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
            order_single = order_single[:pre_nms_topN]

        proposals_single = proposals_single[order_single, :]
        scores_single = scores_single[order_single].view(-1,1)

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)

        keep_idx_i = nms(proposals_single, scores_single, self.rpn_nms_thresh)
        keep_idx_i = keep_idx_i.long().view(-1)

        if self.post_nms_topN > 0:
            keep_idx_i = keep_idx_i[:self.post_nms_topN]
        proposals_single = proposals_single[keep_idx_i, :]
        scores_single = scores_single[keep_idx_i, :]

        # padding 0 at the end.
        num_proposal = proposals_single.size(0)
        output[i,:,0] = i
        output[i,:num_proposal,1:] = proposals_single

    return output

    def backward(self, top, propagate_down, bottom):
        """This layer does not propagate gradients."""
        pass

    def reshape(self, bottom, top):
        """Reshaping happens during the call to forward."""
        pass

    def _filter_boxes(self, boxes, min_size):
        """Remove all boxes with any side smaller than min_size."""
        ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
        hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
        keep = ((ws >= min_size) & (hs >= min_size))
        return keep
def produce_batch(feature_map, gt_boxes, h_w=None, category=None):
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width * height

    w_stride = h_w[1] / width
    h_stride = h_w[0] / height
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    base_anchors = generate_anchors(w_stride, h_stride)
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    all_anchors = (base_anchors.reshape((1, anchors_num, 4)) + shifts.reshape(
        (1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map * anchors_num
    all_anchors = all_anchors.reshape((total_anchors, 4))
    # 用训练好的rpn进行预测,得出scores和deltas
    res = rpn_model.query_cnn(feature_map)
    scores = res[0]
    scores = scores.reshape(-1, 1)
    deltas = res[1]
    deltas = np.reshape(deltas, (-1, 4))
    # 把dx dy转换成具体的xy值,并把照片以外的anchors去掉
    proposals = bbox_transform_inv(all_anchors, deltas)
    proposals = clip_boxes(proposals, (h_w[0], h_w[1]))
    # remove small boxes
    keep = filter_boxes(proposals,
                        small_box_threshold)  # here threshold is 40 pixel
    proposals = proposals[keep, :]
    scores = scores[keep]

    # sort socres and only keep top 6000.
    pre_nms_topN = 6000
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    # apply NMS to to 6000, and then keep top 300
    post_nms_topN = 300
    keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # 把ground true也加到proposals中
    proposals = np.vstack((proposals, gt_boxes))
    # calculate overlaps of proposal and gt_boxes
    overlaps = bbox_overlaps(proposals, gt_boxes)
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    # labels = gt_labels[gt_assignment] #?

    # sub sample
    fg_inds = np.where(max_overlaps >= FG_THRESH)[0]
    fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)
    bg_inds = np.where((max_overlaps < BG_THRESH_HI)
                       & (max_overlaps >= BG_THRESH_LO))[0]
    bg_rois_per_this_image = BATCH - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # labels = labels[keep_inds]
    rois = proposals[keep_inds]
    gt_rois = gt_boxes[gt_assignment[keep_inds]]
    targets = bbox_transform(rois, gt_rois)  #input rois
    rois_num = targets.shape[0]
    batch_box = np.zeros((rois_num, 200, 4))
    for i in range(rois_num):
        batch_box[i, category] = targets[i]
    batch_box = np.reshape(batch_box, (rois_num, -1))
    # get gt category
    batch_categories = np.zeros((rois_num, 200, 1))
    for i in range(rois_num):
        batch_categories[i, category] = 1
    batch_categories = np.reshape(batch_categories, (rois_num, -1))
    return rois, batch_box, batch_categories
Esempio n. 10
0
imdb = eyelevel5k(
    '/media/zehao/WD/Dataset/processed/car_dataset/Rendered/eyelevel5K/images',
    '/media/zehao/WD/Dataset/processed/car_dataset/Rendered/eyelevel5K/annotations'
)

net = caffe.Net(network_proto_path, network_model_path, caffe.TEST)
transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
transformer.set_input_scale('data', 0.1)
transformer.set_mean('data', np.array([104, 117, 123]))
transformer.set_transpose('data', (2, 0, 1))

while True:
    # Generate test images
    img, bboxes = imdb.read()
    jittered_bboxes = bbox_jitter(bboxes[0], 0.1, 3)
    gt_bbox = bbox_transform_inv(bboxes[0])
    j_box = bbox_transform_inv(jittered_bboxes[0])
    test_img = img[j_box[1]:j_box[1] + j_box[3],
                   j_box[0]:j_box[0] + j_box[2], :]
    test_img = caffe.io.resize(test_img, [48, 48, 3])

    offset = net.forward_all(
        data=np.asarray([transformer.preprocess('data', test_img)]))
    print offset['conv6'][0, :]
    print j_box
    out_box = shift(j_box, offset['conv6'][0, :])
    cv2.rectangle(img, (int(out_box[0]), int(out_box[1])),
                  (int(out_box[2]), int(out_box[3])), (0, 255, 0), 1)
    cv2.rectangle(img, (int(j_box[0]), int(j_box[1])),
                  (int(j_box[2]), int(j_box[3])), (255, 0, 0), 1)
    cv2.rectangle(img, (int(gt_bbox[0]), int(gt_bbox[1])),
Esempio n. 11
0
        #restorer = tf.train.Saver(variables_to_restore)
        #restorer.restore(sess, MODEL_CKPT)

        #lr, train_opt = construct_graph(net, sess)
        saver =tf.train.Saver()
        saver.restore(sess,"./checkpoint.ckpt")
        #init = tf.global_variables_initializer()
        #sess.run(init)
        for i in range(0,150):
            blob = loader.fetch()
            roi_score, rois,rpn_cls_prob,cls_pred,bbox_pred = net.test_image(sess,blob["data"],blob["im_info"])
            #roi_score, rois, rpn_cls_prob = net.test_image_train(sess,blob["data"],blob["im_info"],blob['gt_boxes'])
            index = np.where(cls_pred == 1)[0]
            print("roi_score_num : "+str(bbox_pred.shape[0])+" roi_index_num : "+str(index.shape[0]) )
            #print(rois[index][:,1:5])
            bbox = bbox_transform_inv(rois[:,1:5],bbox_pred)
            print(bbox)
            print(bbox.shape)

            bbox = bbox[index]
            print(bbox.shape)

            #overlaps = bbox_overlaps(rois[index][:,1:5], blob["gt_boxes"])
            print("bbox_overlaps debug")
            #print(overlaps[np.where(overlaps>threshold_overlaps)])

            #print(rois[np.where(overlaps>threshold_overlaps)[0]])
            #high_prob_roi = rois[index][np.where(overlaps>threshold_overlaps)[0]]

            img = blob["pil_im"]
            brush = ImageDraw.Draw(img)
Esempio n. 12
0
import os.path
import cv2

from dataset.eyelevel5K import eyelevel5k
from utils import bbox_transform_inv, bbox_jitter, cal_offset

LABEL_OUT_PATH = '/media/zehao/Local2/car_regression_data/labels'
IMAGE_OUT_PATH = '/media/zehao/Local2/car_regression_data/images'

if __name__ == '__main__':
  imdb = eyelevel5k('/media/zehao/WD/Dataset/processed/car_dataset/Rendered/eyelevel5K/images',
             '/media/zehao/WD/Dataset/processed/car_dataset/Rendered/eyelevel5K/annotations')
  for i in range(imdb.num_images):
    img, bboxes = imdb.read()
    jittered_bboxes = bbox_jitter(img, bboxes[0], 0.2, 15)
    gt_bbox = bbox_transform_inv(bboxes[0])
    for j_bbox, j in zip(jittered_bboxes, range(len(jittered_bboxes))):
      j_bbox = bbox_transform_inv(j_bbox)
      j_img = img[j_bbox[1]:j_bbox[3], j_bbox[0]:j_bbox[2], :]
      print os.path.join(IMAGE_OUT_PATH, str(i)+'_'+str(j))+'.jpg'
      cv2.imwrite(os.path.join(IMAGE_OUT_PATH, str(i)+'_'+str(j))+'.jpg', j_img)
      offset = cal_offset(gt_bbox, j_bbox)
      with open(os.path.join(LABEL_OUT_PATH, str(i)+'_'+str(j)+'.txt'), 'w') as f:
        f.write(str(offset[0])+' '+str(offset[1])+' '+str(offset[2])+' '+str(offset[3]))
Esempio n. 13
0
def produce_batch(filepath, gt_boxes, h_w, category):
    img = load_img(filepath)
    img_width = np.shape(img)[1] * scale[1]
    img_height = np.shape(img)[0] * scale[0]
    img = img.resize((int(img_width), int(img_height)))
    #feed image to pretrained model and get feature map
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    feature_map = pretrained_model.predict(img)
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width * height
    #calculate output w, h stride
    w_stride = h_w[1] / width
    h_stride = h_w[0] / height
    #generate base anchors according output stride.
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    base_anchors = generate_anchors(w_stride, h_stride)
    #slice tiles according to image size and stride.
    #each 1x1x1532 feature map is mapping to a tile.
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    #apply base anchors to all tiles, to have a num_feature_map*9 anchors.
    all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape(
        (1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map * 9
    all_anchors = all_anchors.reshape((total_anchors, 4))
    # feed feature map to pretrained RPN model, get proposal labels and bboxes.
    res = rpn_model.predict(feature_map)
    scores = res[0]
    scores = scores.reshape(-1, 1)
    deltas = res[1]
    deltas = np.reshape(deltas, (-1, 4))
    # proposals transform to bbox values (x1, y1, x2, y2)
    proposals = bbox_transform_inv(all_anchors, deltas)
    proposals = clip_boxes(proposals, (h_w[0], h_w[1]))
    # remove small boxes, here threshold is 40 pixel
    keep = filter_boxes(proposals, 40)
    proposals = proposals[keep, :]
    scores = scores[keep]

    # sort socres and only keep top 6000.
    pre_nms_topN = 6000
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    # apply NMS to to 6000, and then keep top 300
    post_nms_topN = 300
    keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # add gt_boxes to proposals.
    proposals = np.vstack((proposals, gt_boxes))
    # calculate overlaps of proposal and gt_boxes
    overlaps = bbox_overlaps(proposals, gt_boxes)
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    # labels = gt_labels[gt_assignment] #?

    # sub sample
    fg_inds = np.where(max_overlaps >= FG_THRESH)[0]
    fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)
    bg_inds = np.where((max_overlaps < BG_THRESH_HI)
                       & (max_overlaps >= BG_THRESH_LO))[0]
    bg_rois_per_this_image = BATCH - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # labels = labels[keep_inds]
    rois = proposals[keep_inds]
    gt_rois = gt_boxes[gt_assignment[keep_inds]]
    targets = bbox_transform(rois, gt_rois)  #input rois
    rois_num = targets.shape[0]
    batch_box = np.zeros((rois_num, 200, 4))
    for i in range(rois_num):
        batch_box[i, category] = targets[i]
    batch_box = np.reshape(batch_box, (rois_num, -1))
    # get gt category
    batch_categories = np.zeros((rois_num, 200, 1))
    for i in range(rois_num):
        batch_categories[i, category] = 1
    batch_categories = np.reshape(batch_categories, (rois_num, -1))
    return rois, batch_box, batch_categories