def _rcnn_forward(self, im, thres=0.5):
        im, im_scale = resize(im, self.target_size, self.max_size)
        im_tensor = transform(im, self.input_mean, self.input_scale)
        im_info = np.array(
            [[im_tensor.shape[2], im_tensor.shape[3], im_scale]],
            dtype=np.float32)

        data = [mx.nd.array(im_tensor), mx.nd.array(im_info)]
        data_shapes = [('data', im_tensor.shape), ('im_info', im_info.shape)]
        data_batch = mx.io.DataBatch(data=data,
                                     label=None,
                                     provide_data=data_shapes,
                                     provide_label=None)
        output = self.predict(data_batch)

        rois = output['rois_output'].asnumpy()[:, 1:]

        # save output
        scores = output['cls_prob_reshape_output'].asnumpy()[0]
        bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

        # post processing
        pred_boxes = bbox_pred(rois, bbox_deltas)
        pred_boxes = clip_boxes(pred_boxes, im_tensor.shape[-2:])

        # we used scaled image & roi to train, so it is necessary to transform them back
        pred_boxes = pred_boxes / im_scale

        return self._post_process(scores, pred_boxes, thres)
Beispiel #2
0
def im_detect(predictor, data_batch, data_names, scale):
    output = predictor.predict(data_batch)

    data_dict = dict(zip(data_names, data_batch.data))
    if config.TEST.HAS_RPN:
        rois = output['rois_output'].asnumpy()[:, 1:]
    else:
        rois = data_dict['rois'].asnumpy().reshape((-1, 5))[:, 1:]
    im_shape = data_dict['data'].shape

    # save output
    scores = output['cls_prob_reshape_output'].asnumpy()[0]
    bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]

    # post processing
    pred_boxes = bbox_pred(rois, bbox_deltas)
    pred_boxes = clip_boxes(pred_boxes, im_shape[-2:])

    # we used scaled image & roi to train, so it is necessary to transform them back
    pred_boxes = pred_boxes / scale

    return scores, pred_boxes, data_dict
Beispiel #3
0
 def _rcnn_forward(self, im, rois=None, thres=0.5):
     debug = False
     im, im_scale = resize(im, self.target_size, self.max_size, self.image_stride)
     im_tensor = transform(im, self.input_mean, self.input_scale)
     if self.proposal_type == 'rpn':
         im_info = np.array([[im_tensor.shape[2], im_tensor.shape[3], im_scale]], dtype=np.float32)
         
         if len(self.feat_sym) == 0:
             data_shapes = [('data', im_tensor.shape), ('im_info', im_info.shape)]
             data = [mx.nd.array(im_tensor), mx.nd.array(im_info)] 
         else:
             data_shape = {'data': im_tensor.shape}
             feat_shapes = []
             for feat in self.feat_sym:
                 _, feat_shape, _ = feat.infer_shape(**data_shape)#get size of feature map for rpn, there are 4 feat_sym in fpn
                 feat_shape = [int(i) for i in feat_shape[0]]
                 feat_shapes.append(feat_shape)
             feat_shape_ = np.array(feat_shapes[0])
             #print len(feat_shapes)
             for i in range(1, len(feat_shapes)):
                 a = np.array(feat_shapes[i])
                 feat_shape_=np.vstack((feat_shape_, a))#[5,4]
             #print feat_shape_.shape
             final_feat_shape = feat_shape_[np.newaxis,:,:]#[1,5,4]
             data_shapes = [('data', im_tensor.shape), ('im_info', im_info.shape), ('feat_shape', final_feat_shape.shape)]
             data = [mx.nd.array(im_tensor), mx.nd.array(im_info), mx.nd.array(final_feat_shape)] 
     elif self.proposal_type == 'existed_roi':
         assert rois is not None
         rois = rois.reshape(1, -1, 5)
         data = [mx.nd.array(im_tensor), mx.nd.array(rois)]
         data_shapes = [('data', im_tensor.shape), ('rois', rois.shape)]
     
     data_batch = mx.io.DataBatch(data=data, label=None, provide_data=data_shapes,
                                  provide_label=None)
     t = time.time()
     output = self.predict(data_batch)
     
     if self.proposal_type == 'rpn':
         rois = output['rois_output'].asnumpy()[:, 1:]
         if debug:
             f2 = open('predict.txt', 'w')
             roid_rpn = output['rois_output'].asnumpy()#1200,5
             roid_pred = roid_rpn
             for i in range(roid_pred.shape[0]):
                 w = max(0, int(roid_pred[i,3]-roid_pred[i,1]))
                 h = max(0, int(roid_pred[i,4]-roid_pred[i,2]))
                 s = w*h
                 print im.shape
                 if w < 50 or h < 300 or w > im.shape[1] or h > im.shape[0] or w > h:
                     continue
                 cv2.rectangle(im, (int(roid_pred[i,1]), int(roid_pred[i,2])), (int(roid_pred[i,3]), int(roid_pred[i,4])), (255,0,0), 1)
                 f2.write(str(roid_rpn[i,:])+'\n')
             cv2.imwrite('rpn_result.jpg', im)
     elif self.proposal_type == 'existed_roi':
         rois = rois[0][:, 1:]
     
     # save output
     scores = output['cls_prob_reshape_output'].asnumpy()[0]
     bbox_deltas = output['bbox_pred_reshape_output'].asnumpy()[0]
     t1 = time.time() - t
     # post processing
     print 'predict :{:.4f}s'.format(t1)
     if 0:
         for i in range(rois.shape[0]):
             f2.write('rois: ' + str(i)+'    :   '+str(rois[i,:]) + '#############bbox_deltas: ' +  str(bbox_deltas[i,4:]) + '#################cls_pred: '+ str(scores[i,1])+'\n')
     person_score = scores[:,1]
     max_score = max(person_score)
     print max_score
     pred_boxes = bbox_pred(rois, bbox_deltas)
     pred_boxes = clip_boxes(pred_boxes, im_tensor.shape[-2:])
 
     # we used scaled image & roi to train, so it is necessary to transform them back
     pred_boxes = pred_boxes / im_scale
     
     return self._post_process(scores, pred_boxes, thres)
Beispiel #4
0
    def forward(self, is_train, req, in_data, out_data, aux):
        nms = gpu_nms_wrapper(self._threshold, 0)
        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores_list = in_data[0].asnumpy()  #[1,n]
        #print 'score_list shape:',scores_list.shape
        bbox_deltas_list = in_data[1].asnumpy()  #[1,n*2]
        im_info = in_data[2].asnumpy()[0, :]
        feat_shape = in_data[3].asnumpy()
        #t = time.time()
        #print 'feat_shape:', feat_shape
        num_feat = feat_shape.shape[1]  #[1,5,4]
        score_index_start = 0
        bbox_index_start = 0
        keep_proposal = []
        keep_scores = []
        #t_1 = time.time()
        for i in range(num_feat):
            feat_stride = int(self._feat_stride[i])  #4,8,16,32,64
            #print 'feat_stride:', feat_stride
            anchor = generate_anchors(feat_stride,
                                      scales=self._scales,
                                      ratios=self._ratios)
            num_anchors = anchor.shape[0]  #3
            height = feat_shape[0, i, 2]
            width = feat_shape[0, i, 3]

            shift_x = np.arange(0, width) * feat_stride
            shift_y = np.arange(0, height) * feat_stride
            shift_x, shift_y = np.meshgrid(shift_x, shift_y)
            shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                                shift_x.ravel(), shift_y.ravel())).transpose()
            A = num_anchors  #3
            K = shifts.shape[0]  #height*width
            anchors = anchor.reshape((1, A, 4)) + shifts.reshape(
                (1, K, 4)).transpose((1, 0, 2))
            anchors = anchors.reshape((K * A, 4))  #3*height*widht,4
            scores = (scores_list[
                0,
                int(score_index_start):int(score_index_start +
                                           K * A * 2)]).reshape(
                                               (1, int(2 * num_anchors), -1,
                                                int(width)))  #1,2*3,h,w
            scores = scores[:, num_anchors:, :, :]  #1,3,h,w
            bbox_deltas = (bbox_deltas_list[
                0, int(bbox_index_start):int(bbox_index_start +
                                             K * A * 4)]).reshape(
                                                 (1, int(4 * num_anchors), -1,
                                                  int(width)))  #1,4*3,h,w
            score_index_start += K * A * 2
            bbox_index_start += K * A * 4
            bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
            bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape(
                (-1, 4))  #[1,h,w,12]--->[1*h*w*3,4]
            scores = self._clip_pad(scores, (height, width))
            scores = scores.transpose((0, 2, 3, 1)).reshape(
                (-1, 1))  #[1,h,w,3]--->[1*h*w*3,1]
            proposals = bbox_pred(anchors,
                                  bbox_deltas)  #debug here, corresponding?
            proposals = clip_boxes(proposals, im_info[:2])
            keep = self._filter_boxes(proposals, min_size[i] * im_info[2])
            keep_proposal.append(proposals[keep, :])
            keep_scores.append(scores[keep])

        proposals = keep_proposal[0]
        scores = keep_scores[0]
        for i in range(1, num_feat):
            proposals = np.vstack((proposals, keep_proposal[i]))
            scores = np.vstack((scores, keep_scores[i]))
        #print 'roi concate t_1 spends :{:.4f}s'.format(time.time()-t_1)
        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        #t_2 = time.time()
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]
        #print 'roi concate t_2_1_1 spends :{:.4f}s'.format(time.time()-t_2)
        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        #t_nms = time.time()
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)
        #print 'roi concate nms spends :{:.4f}s'.format(time.time()-t_nms)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            try:
                pad = npr.choice(keep, size=post_nms_topN - len(keep))
            except:
                proposals = np.zeros((post_nms_topN, 4), dtype=np.float32)
                proposals[:, 2] = 16
                proposals[:, 3] = 16
                batch_inds = np.zeros((proposals.shape[0], 1),
                                      dtype=np.float32)
                blob = np.hstack(
                    (batch_inds, proposals.astype(np.float32, copy=False)))
                self.assign(out_data[0], req[0], blob)

                if self._output_score:
                    self.assign(out_data[1], req[1],
                                scores.astype(np.float32, copy=False))
                return
            keep = np.hstack((keep, pad))
        proposals = proposals[keep, :]
        scores = scores[keep]
        #print 'roi concate t_2 spends :{:.4f}s'.format(time.time()-t_2)
        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))
    def forward(self, is_train, req, in_data, out_data, aux):
        #nms = gpu_nms_wrapper(self._threshold, in_data[0].context.device_id)
        nms = py_nms_wrapper(self._threshold)
        batch_size = in_data[0].shape[0]
        if batch_size > 1:
            raise ValueError(
                "Sorry, multiple images each device is not implemented")

        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self._rpn_pre_nms_top_n
        post_nms_topN = self._rpn_post_nms_top_n
        min_size = self._rpn_min_size

        # the first set of anchors are background probabilities
        # keep the second part
        scores = in_data[0].asnumpy()[:, self._num_anchors:, :, :]
        bbox_deltas = in_data[1].asnumpy()

        im_info = in_data[2].asnumpy()[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])
        # feat_shape = in_data[3].asnumpy()
        # # 1. Generate proposals from bbox_deltas and shifted anchors
        # # use real image size instead of padded feature map sizes
        # height = feat_shape[0,i,2]
        # width = feat_shape[0,i,3]
        height, width = int(im_info[0] / self._feat_stride), int(
            im_info[1] / self._feat_stride)

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)
            print "resudial: {}".format(
                (scores.shape[2] - height, scores.shape[3] - width))

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + shifts.reshape(
            (1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order

        bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = self._clip_pad(scores, (height, width))
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_pred(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = self._filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        det = np.hstack((proposals, scores)).astype(np.float32)
        keep = nms(det)

        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        # pad to ensure output size remains unchanged
        if len(keep) < post_nms_topN:
            pad = npr.choice(keep, size=post_nms_topN - len(keep))
            keep = np.hstack((keep, pad))

        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois array
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        blob = np.hstack((batch_inds, proposals.astype(np.float32,
                                                       copy=False)))
        self.assign(out_data[0], req[0], blob)

        if self._output_score:
            self.assign(out_data[1], req[1],
                        scores.astype(np.float32, copy=False))