Beispiel #1
0
def proposal_layer(rpn_cls_prob,rpn_bbox_pred,im_info,_feat_stride,anchors,num_anchors,is_training):
    scores=rpn_cls_prob[:,:,:,num_anchors:]
    rpn_bbox_pred=rpn_bbox_pred.reshape((-1,4))
    scores=scores.reshape((-1,1))
    proposals=bbox_transform_inv(anchors,rpn_bbox_pred)
    proposals=clip_boxes(proposals,im_info[:2])
    
    # pick the top region proposals:
    order=scores.ravel().argsort()[::-1]
    if is_training:
        order=order[:train_rpn_pre_nms_topN]
    else:
        order=order[:test_rpn_pre_nms_topN]
    proposals=proposals[order,:]
    scores=scores[order]
    keep=nms(np.hstack((proposals,scores)),rpn_nms_thresh)
    if is_training:
        keep=keep[:train_rpn_nms_post_topN]
    else:
        keep=keep[:test_rpn_nms_post_topN]
    proposals=proposals[keep,:]
    scores=scores[keep]
    #only support single image as input:
    batch_indx=np.zeros((proposals.shape[0],1),dtype=np.float32)
    blob=np.hstack((batch_indx,proposals.astype(np.float32,copy=False)))
    return blob,scores
Beispiel #2
0
def slpn_pred(ROIs, P_cls, P_regr, C, bbox_thresh=0.1, nms_thresh=0.3,roi_stride=8):
	# classifier output the box of x y w h and downscaled
	scores = np.squeeze(P_cls[:,:,0], axis=0)
	regr = np.squeeze(P_regr, axis=0)
	rois = np.squeeze(ROIs, axis=0)

	keep = np.where(scores>=bbox_thresh)[0]
	if len(keep)==0:
		return [], []

	rois[:, 2] += rois[:, 0]
	rois[:, 3] += rois[:, 1]
	rois = rois[keep]*roi_stride
	scores = scores[keep]
	regr = regr[keep]*np.array(C.classifier_regr_std).astype(dtype=np.float32)
	# regr = regr[keep]
	pred_boxes = bbox_transform_inv(rois, regr)
	pred_boxes = clip_boxes(pred_boxes, [C.random_crop[0],C.random_crop[1]])

	keep = np.where((pred_boxes[:,2]-pred_boxes[:,0]>=3)&
					(pred_boxes[:,3]-pred_boxes[:,1]>=3))[0]
	pred_boxes = pred_boxes[keep]
	scores = scores[keep].reshape((-1,1))

	keep = nms(np.hstack((pred_boxes, scores)), nms_thresh, usegpu=False, gpu_id=0)
	pred_boxes = pred_boxes[keep]
	scores = scores[keep]

	return pred_boxes, scores
Beispiel #3
0
def pred_det(anchors, cls_pred, regr_pred, C, step=1):
    if step == 1:
        scores = cls_pred[0, :, :]
    elif step == 2:
        scores = anchors[:, -1:] * cls_pred[0, :, :]
    elif step == 3:
        scores = anchors[:, -2:-1] * anchors[:, -1:] * cls_pred[0, :, :]
    A = np.copy(anchors[:, :4])
    bbox_deltas = regr_pred.reshape((-1, 4))
    bbox_deltas = bbox_deltas * np.array(
        C.classifier_regr_std).astype(dtype=np.float32)

    proposals = bbox_transform_inv(A, bbox_deltas)
    proposals = clip_boxes(proposals, [C.random_crop[0], C.random_crop[1]])
    keep = filter_boxes(proposals, C.roi_stride)
    proposals = proposals[keep, :]
    scores = scores[keep]

    order = scores.ravel().argsort()[::-1]
    order = order[:C.pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    keep = np.where(scores > C.scorethre)[0]
    proposals = proposals[keep, :]
    scores = scores[keep]
    keep = nms(np.hstack((proposals, scores)),
               C.overlap_thresh,
               usegpu=False,
               gpu_id=0)

    keep = keep[:C.post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    return proposals, scores
Beispiel #4
0
def pred_pp_2nd(anchors, cls_pred, regr_pred, C):
    scores = cls_pred[0, :, :]
    bbox_deltas = regr_pred.reshape((-1, 4))
    bbox_deltas = bbox_deltas * np.array(C.classifier_regr_std).astype(dtype=np.float32)
    anchors[:, :4] = bbox_transform_inv(anchors[:, :4], bbox_deltas)
    anchors[:, :4] = clip_boxes(anchors[:, :4], [C.random_crop[0], C.random_crop[1]])
    proposals = np.concatenate((anchors, scores), axis=-1)
    return proposals
Beispiel #5
0
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride,
                       anchors, num_anchors):
    """A layer that just selects the top region proposals
     without using non-maximal suppression,
     For details please see the technical report

       self._im_info,
          # self._im_info = tf.placeholder(tf.float32, shape=[3])
          self._feat_stride,#16
          self._anchors, # 特征图的所有点的9个框对应原始坐标的  所有  坐标anchors  anchor_length和个数length
          self._num_anchors#9
            [tf.float32, tf.float32], name="proposal_top"
  """
    rpn_top_n = cfg.TEST.RPN_TOP_N
    # cfg.TEST.RPN_TOP_N = 5000

    #num_anchors 9
    scores = rpn_cls_prob[:, :, :, num_anchors:]

    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))

    length = scores.shape[0]
    if length < rpn_top_n:  #  5000
        # Random selection, maybe unnecessary and loses good proposals
        # But such case rarely happens
        #choice() 方法返回一个列表,元组或字符串的随机项
        top_inds = npr.choice(length, size=rpn_top_n, replace=True)
        #  npr   random
    else:
        top_inds = scores.argsort(0)[::-1]
        #argsort函数返回的是数组值从小到大的索引值
        top_inds = top_inds[:rpn_top_n]  #取5000个
        top_inds = top_inds.reshape(rpn_top_n, )

    # Do the selection here
    anchors = anchors[top_inds, :]
    #特征图映射到原图的所有框  top_inds 是5000个值  :是四个坐标值
    rpn_bbox_pred = rpn_bbox_pred[top_inds, :]
    scores = scores[top_inds]

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    #输入的是5000个特征图上映射到原图的框坐标
    #输入的是5000个特征图上的框坐标

    # Clip predicted boxes to image#限定范围
    proposals = clip_boxes(proposals, im_info[:2])

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob, scores
Beispiel #6
0
def im_detect(net, im, boxes):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    # use softmax estimated probabilities
    scores = blobs_out['cls_prob']
    print scores

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        box_deltas = blobs_out['bbox_pred']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
Beispiel #7
0
def generate_pp_2nd(all_anchors, regr_layer, C):
    A = np.copy(all_anchors[:, :4])
    proposals_batch = []
    for i in range(regr_layer.shape[0]):
        proposals = np.ones_like(all_anchors)

        bbox_deltas = regr_layer[i, :, :]

        bbox_deltas = bbox_deltas * np.array(C.classifier_regr_std).astype(dtype=np.float32)
        proposals[:, :4] = bbox_transform_inv(A, bbox_deltas)
        proposals = clip_boxes(proposals, [C.random_crop[0], C.random_crop[1]])
        proposals_batch.append(np.expand_dims(proposals, axis=0))
    return np.concatenate(proposals_batch, axis=0)
Beispiel #8
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride,
                   anchors, num_anchors):
    """A simplified version compared to fast/er RCNN
     For details please see the technical report
  """
    if type(cfg_key) == bytes:
        cfg_key = cfg_key.decode('utf-8')
    pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N  # RPN_PRE_NMS_TOP_N = 6000
    post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
    # __C.TEST.RPN_POST_NMS_TOP_N = 300    非极大值抑制输出的 最大个数

    nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
    # __C.TEST.RPN_NMS_THRESH = 0.7

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))

    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    # shape = (length, 4)
    # proposals  就是真实预测的边框的四个坐标值
    # 特征图映射到原图的所有的框anchors   与特征图的值rpn_bbox_pred  组合    进行回归预测

    proposals = clip_boxes(proposals, im_info[:2])
    # 限制预测坐标在原始图像上  限制这预测 的坐标的  值  在一定的范围内

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
Beispiel #9
0
def get_proposal(all_anchors, cls_layer, regr_layer, C, overlap_thresh=0.7,pre_nms_topN=1000,post_nms_topN=300, roi_stride=8):
	A = np.copy(all_anchors[:,:4])
	scores = cls_layer.reshape((-1,1))
	bbox_deltas = regr_layer.reshape((-1,4))
	proposals = bbox_transform_inv(A, bbox_deltas)
	proposals = clip_boxes(proposals, [C.random_crop[0],C.random_crop[1]])
	keep = filter_boxes(proposals, roi_stride)
	proposals = proposals[keep,:]
	scores = scores[keep]
	order = scores.ravel().argsort()[::-1]
	order = order[:pre_nms_topN]
	proposals =  proposals[order,:]
	scores = scores[order]
	keep = nms(np.hstack((proposals, scores)), overlap_thresh, usegpu=False, gpu_id=0)
	keep = keep[:post_nms_topN]
	proposals = proposals[keep,:]
	return proposals
Beispiel #10
0
    def proposal_layer(self, rpn_cls_prob, rpn_bbox_pred, rpn_trans_param,
                       im_info):
        if self.is_train:
            pre_nms_top_n = self.config['train_rpn_pre_nms_top_n']
            post_nms_top_n = self.config['train_rpn_post_nms_top_n']
            nms_thresh = self.config['train_rpn_nms_thresh']
        else:
            pre_nms_top_n = self.config['test_rpn_pre_nms_top_n']
            post_nms_top_n = self.config['test_rpn_post_nms_top_n']
            nms_thresh = self.config['test_rpn_nms_thresh']

        # Get the scores and bounding boxes
        scores = rpn_cls_prob[:, :, :, self.num_anchors:]
        rpn_bbox_pred = rpn_bbox_pred.view((-1, 4))
        scores = scores.contiguous().view(-1, 1)
        rpn_trans_param = rpn_trans_param.view((-1, 6))

        proposals = bbox_transform_inv(self.anchors, rpn_bbox_pred)
        proposals = clip_boxes(proposals, im_info[:2])

        # Pick the top region proposals
        scores, order = scores.view(-1).sort(descending=True)
        if pre_nms_top_n > 0:
            order = order[:pre_nms_top_n]
            scores = scores[:pre_nms_top_n].view(-1, 1)
        proposals = proposals[order.data, :]
        trans_param = rpn_trans_param[order.data, :]

        # Non-maximal suppression
        keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh)

        # Pick th top region proposals after NMS
        if post_nms_top_n > 0:
            keep = keep[:post_nms_top_n]
        proposals = proposals[keep, :]
        scores = scores[keep,]
        trans_param = trans_param[keep, :]

        # Only support single image as input
        batch_inds = Variable(
            proposals.data.new(proposals.size(0), 1).zero_())
        blob = torch.cat((batch_inds, proposals), 1)

        return blob, scores, trans_param
Beispiel #11
0
    def test_single(self, image, rois, image_size, image_resize_ratio):
        """ Test a single image on the net.
        Args:
            image: A preprocessed image or precomputed features of
                the image. As ndarray.
            rois: Rois sized for the the image.
                Ndarray: (image_index, x1, y1, x2, y2)
            image_size: The original image size.
            image_resize_ratio: What is the ratio that this image was resized on.
        """
        rois_np, dedup_inv_index = self.dedup_boxes(rois.numpy())
        image_var = Variable(image.cuda(), volatile=True)
        rois_var = Variable(torch.Tensor(rois_np).cuda(), volatile=True)

        # Run the img through the network
        out = self.model(image_var, rois_var)
        # predicted deltas
        deltas = out[1].data.cpu().numpy()
        deltas = self.unnormalize_deltas(deltas, self._targets_mean,
                                         self._targets_std)

        # transform rois using predicted deltas
        boxes = rois_np[:, 1:] / image_resize_ratio
        bboxes_inv_transformed = bbox_transform_inv(boxes, deltas)

        class_probas, class_indexes = torch.max(out[0], 1)
        indexes_np = np.squeeze(class_indexes.data.cpu().numpy())
        #     print('Total FG RoIs Detected: ', np.sum(indexes_np > 0))

        scores = out[0].data.cpu().numpy()
        scores = np.exp(scores)

        # clip rois to image size
        bboxes_inv_transformed = clip_boxes(bboxes_inv_transformed, image_size)

        scores = scores[dedup_inv_index, :]
        bboxes_inv_transformed = bboxes_inv_transformed[dedup_inv_index, :]

        # Non-maximum supression of similar boxes
        all_boxes = self._nms_boxes(bboxes_inv_transformed, scores)

        return all_boxes
Beispiel #12
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors, mode='train'):
    """A simplified version compared to fast/er RCNN
       For details please see the technical report
    """

    pre_nms_topN = 12000
    post_nms_topN = 2000
    nms_thresh = 0.7

    if mode == 'test':
        pre_nms_topN = 3000
        post_nms_topN = 300

    # Get the scores and bounding boxes
    scores = rpn_cls_prob[:, :, :, num_anchors:]
    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
    proposals = clip_boxes(proposals, im_info[:2])

    # Pick the top region proposals
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # Non-maximal suppression
    keep = nms(np.hstack((proposals, scores)), nms_thresh)

    # Pick th top region proposals after NMS
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]

    # Only support single image as input
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

    return blob, scores
Beispiel #13
0
    def forward(self, epoch, speech_data, act_lens, gt_boxes, num_boxes):
        cfg_key = 'TRAIN' if self.training else 'TEST'
        batch_size = speech_data.size(0)
        # Feature extraction
        base_feat = self.feature_extractor(speech_data, act_lens)
        # RPN, get the proposals and anchors and predicted scores
        anchors_per_utt, proposals, rpn_cls_score, rpn_bbox_pred = self.rpn_nnet(
            base_feat)
        # here scores didn't go through the softmax
        # batch_size * num_anchors_per_utt * 2 (box_dim or score_dim)
        rois = clip_boxes(proposals, act_lens, batch_size)

        rpn_label = None
        # here we first calculate the rpn loss and then calculate the kws loss
        if self.training:
            # calculate rpn loss
            rpn_data = self.anchor_target_layer(
                anchors_per_utt, gt_boxes, act_lens
            )  # rpn trainning targets: labels, bbox_regression targets, bbox_inside_wieght, bbox_outside_weight
            rpn_label = rpn_data[0].long().view(-1)
            rpn_keep = rpn_label.ne(-1).nonzero().view(-1)

            rpn_label = torch.index_select(rpn_label, 0, rpn_keep)
            rpn_cls_score = torch.index_select(
                rpn_cls_score.view(-1, self.num_class), 0, rpn_keep)

            self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)

            rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[
                1:]
            self.rpn_loss_bbox = smooth_l1_loss(rpn_bbox_pred,
                                                rpn_bbox_targets,
                                                rpn_bbox_inside_weights,
                                                rpn_bbox_outside_weights,
                                                sigma=10,
                                                dim=[0, 1])

            return rois, rpn_cls_score, rpn_label, self.rpn_loss_cls, self.rpn_loss_bbox
        return rois, rpn_cls_score, anchors_per_utt
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride, anchors, num_anchors):
  """A layer that just selects the top region proposals
     without using non-maximal suppression,
     For details please see the technical report
  """
  rpn_top_n = cfg.TEST.RPN_TOP_N

  scores = rpn_cls_prob[:, :, :, num_anchors:]

  rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
  scores = scores.reshape((-1, 1))

  length = scores.shape[0]
  if length < rpn_top_n:
    # Random selection, maybe unnecessary and loses good proposals
    # But such case rarely happens
    top_inds = npr.choice(length, size=rpn_top_n, replace=True)
  else:
    top_inds = scores.argsort(0)[::-1]
    top_inds = top_inds[:rpn_top_n]
    top_inds = top_inds.reshape(rpn_top_n, )

  # Do the selection here
  anchors = anchors[top_inds, :]
  rpn_bbox_pred = rpn_bbox_pred[top_inds, :]
  scores = scores[top_inds]

  # Convert anchors into proposals via bbox transformations
  proposals = bbox_transform_inv(anchors, rpn_bbox_pred)

  # Clip predicted boxes to image
  proposals = clip_boxes(proposals, im_info[:2])

  # Output rois blob
  # Our RPN implementation only supports a single input image, so all
  # batch inds are 0
  batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
  blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
  return blob, scores
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
        min_size = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)

        # _, order = torch.sort(scores_keep, 1, True)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Beispiel #16
0
    def __call__(self, rpn_cls_prob, rpn_bbox_pred, im_info, train):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        pre_nms_topN = self.RPN_PRE_NMS_TOP_N if train else 6000
        post_nms_topN = self.RPN_POST_NMS_TOP_N if train else 300
        nms_thresh = self.RPN_NMS_THRESH
        min_size = self.RPN_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = to_cpu(rpn_cls_prob.data[:, self._num_anchors:, :, :])
        bbox_deltas = to_cpu(rpn_bbox_pred.data)
        im_info = im_info[0, :]

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.asarray(np.meshgrid(shift_x, shift_y))
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, -1)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN (e.g. 6000)
        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (e.g. threshold = 0.7)
        # 7. take after_nms_topN (e.g. 300)
        # 8. return the top proposals (-> RoIs top)
        keep = nms(np.hstack((proposals, scores)), nms_thresh)
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
        rois = np.asarray(np.hstack((batch_inds, proposals)), dtype=np.float32)

        return rois
Beispiel #17
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (L, H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)


        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, :, 1]  # batch_size x num_rois x 1
        bbox_deltas = input[1]      # batch_size x num_rois x 6
        im_info = input[2]
        cfg_key = input[3]
        feat_shapes = input[4]        

        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios,
                l_ratios, feat_shapes, 
                self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores)
        num_anchors = anchors.size(0)

        anchors = anchors.view(1, num_anchors, 6).expand(batch_size, num_anchors, 6)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze()
                
        scores_keep = scores
        proposals_keep = proposals

        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 7).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1,1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i,:,0] = i
            output[i,:num_proposal,1:] = proposals_single

        return output
Beispiel #18
0
def im_detect(net, im, boxes=None):
    """Detect object classes in an image given object proposals.

    Arguments:
        net (caffe.Net): Fast R-CNN network to use
        im (ndarray): color image to test (in BGR order)
        boxes (ndarray): R x 4 array of object proposals or None (for RPN)

    Returns:
        scores (ndarray): R x K array of object class scores (K includes
            background as object category 0)
        boxes (ndarray): R x (4*K) array of predicted bounding boxes
    """
    blobs, im_scales = _get_blobs(im, boxes)

    # When mapping from image ROIs to feature map ROIs, there's some aliasing
    # (some distinct image ROIs get mapped to the same feature ROI).
    # Here, we identify duplicate feature ROIs, so we only compute features
    # on the unique subset.
    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        v = np.array([1, 1e3, 1e6, 1e9, 1e12])
        hashes = np.round(blobs['rois'] * cfg.DEDUP_BOXES).dot(v)
        _, index, inv_index = np.unique(hashes,
                                        return_index=True,
                                        return_inverse=True)
        blobs['rois'] = blobs['rois'][index, :]
        boxes = boxes[index, :]

    if cfg.TEST.HAS_RPN:
        im_blob = blobs['data']
        blobs['im_info'] = np.array(
            [[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
            dtype=np.float32)

    # reshape network inputs
    net.blobs['data'].reshape(*(blobs['data'].shape))
    if cfg.TEST.HAS_RPN:
        net.blobs['im_info'].reshape(*(blobs['im_info'].shape))
    else:
        net.blobs['rois'].reshape(*(blobs['rois'].shape))

    # do forward
    forward_kwargs = {'data': blobs['data'].astype(np.float32, copy=False)}
    if cfg.TEST.HAS_RPN:
        forward_kwargs['im_info'] = blobs['im_info'].astype(np.float32,
                                                            copy=False)
    else:
        forward_kwargs['rois'] = blobs['rois'].astype(np.float32, copy=False)
    blobs_out = net.forward(**forward_kwargs)

    if cfg.TEST.HAS_RPN:
        assert len(im_scales) == 1, "Only single-image batch implemented"
        rois = net.blobs['rois'].data.copy()
        # unscale back to raw image space
        boxes = rois[:, 1:5] / im_scales[0]

    if cfg.TEST.SVM:
        # use the raw scores before softmax under the assumption they
        # were trained as linear SVMs
        # scores = net.blobs['cls_score'].data
        ### CHANGED
        scores = net.blobs['cls_score_box'].data
    else:
        # use softmax estimated probabilities
        scores = blobs_out['cls_prob']

    if cfg.TEST.BBOX_REG:
        # Apply bounding-box regression deltas
        # box_deltas = blobs_out['bbox_pred']
        ### CHANGED
        box_deltas = blobs_out['bbox_pred_box']
        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, im.shape)
    else:
        # Simply repeat the boxes, once for each class
        pred_boxes = np.tile(boxes, (1, scores.shape[1]))

    if cfg.DEDUP_BOXES > 0 and not cfg.TEST.HAS_RPN:
        # Map scores and predictions back to the original set of boxes
        scores = scores[inv_index, :]
        pred_boxes = pred_boxes[inv_index, :]

    return scores, pred_boxes
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)


        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]
        cfg_key = input[3]

        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
        min_size      = cfg[cfg_key].RPN_MIN_SIZE

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
                                  shift_x.ravel(), shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)
        # proposals = clip_boxes_batch(proposals, im_info, batch_size)

        # assign the score to 0 if it's non keep.
        # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])

        # trim keep index to make it euqal over batch
        # keep_idx = torch.cat(tuple(keep_idx), 0)

        # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
        # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
        
        # _, order = torch.sort(scores_keep, 1, True)
        
        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1,1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i,:,0] = i
            output[i,:num_proposal,1:] = proposals_single

        return output
def _proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, cfg_key, _feat_stride, anchor_scales):
    '''
    # Algorithm:
    #
    # for each (H, W) location i
    #   generate A anchor boxes centered on cell i
    #   apply predicted bbox deltas at cell i to each of the A anchors
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all (proposal, score) pairs by score from highest to lowest
    # take top pre_nms_topN proposals before NMS
    # apply NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposals (-> RoIs top, scores top)
    # rpn_bbox_cls_prob shape : 1 , h , w , 2*9
    # rpn_bbox_pred shape : 1 , h , w , 4*9
    '''
    _anchors = generate_anchor.generate_anchors(scales=np.array(anchor_scales)) # #_anchors ( 9, 4 )
    _num_anchors = _anchors.shape[0] #9
    rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2]) # rpn bbox _cls prob # 1, 18 , h , w
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2]) # 1, 36 , h , w

    # Only minibatch of 1 supported
    assert rpn_bbox_cls_prob.shape[0] == 1, \
        'Only single item batches are supported'
    if cfg_key:
        pre_nms_topN = cfg.TRAIN.RPN_PRE_NMS_TOP_N #12000
        post_nms_topN = cfg.TRAIN.RPN_POST_NMS_TOP_N # 2000
        nms_thresh = cfg.TRAIN.RPN_NMS_THRESH #0.1
        min_size = cfg.TRAIN.RPN_MIN_SIZE # 16

    else:  # cfg_key == 'TEST':
        pre_nms_topN = cfg.TEST.RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg.TEST.RPN_POST_NMS_TOP_N
        nms_thresh = cfg.TEST.RPN_NMS_THRESH
        min_size = cfg.TEST.RPN_MIN_SIZE

    # the first set of _num_anchors channels are bg probs
    # the second set are the fg probs
    # 1. Generate proposals from bbox deltas and shifted anchors
    n, ch , height, width = rpn_bbox_cls_prob.shape

    ## rpn bbox _cls prob # 1, 18 , h , w
    scores = rpn_bbox_cls_prob.reshape([1,2, ch//2 *  height ,width])
    scores = scores.transpose([0,2,3,1])
    scores = scores.reshape([-1,2])
    scores = scores[:,1]
    scores =scores.reshape([-1,1])
    scores_ori = scores

    # Enumerate all shifts
    shift_x = np.arange(0, width) * _feat_stride
    shift_y = np.arange(0, height) * _feat_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                        shift_x.ravel(), shift_y.ravel())).transpose()
    # Enumerate all shifted anchors:
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    A = _num_anchors
    K = shifts.shape[0]

    #anchors = _anchors.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
    anchors = np.array([])
    for i in range(len(_anchors)):
        if i == 0:
            anchors = np.add(shifts, _anchors[i])
        else:
            anchors = np.concatenate((anchors, np.add(shifts, _anchors[i])), axis=0)
    anchors = anchors.reshape((K * A, 4))

    ## BBOX TRANSPOSE (1,4*A,H,W --> A*H*W,4)
    shape = rpn_bbox_pred.shape # 1,4*A , H, W
    rpn_bbox_pred=rpn_bbox_pred.reshape([1, 4 , (shape[1]//4)*shape[2] , shape[3] ])
    rpn_bbox_pred=rpn_bbox_pred.transpose([0,2,3,1])
    rpn_bbox_pred = rpn_bbox_pred.reshape([-1,4])
    bbox_deltas=rpn_bbox_pred
    ## CLS TRANSPOSE ##

    ## BBOX TRANSPOSE Using Anchor
    proposals = bbox_transform_inv(anchors, bbox_deltas)
    proposals_ori = proposals
    proposals = clip_boxes(proposals, im_dims) # image size 보다 큰 proposals 들이 줄어 들수 있도록 한다.
    keep = _filter_boxes(proposals, min_size) # min size = 16 # min보다 큰 놈들만 살아남았다
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    #print 'scores : ',np.shape(scores) #421 ,13 <--여기 13이 자꾸 바귄다..
    order = scores.ravel().argsort()[::-1] # 크기 순서를 뒤집는다 가장 큰 값이 먼저 오게 한다
    if pre_nms_topN > 0: #120000
        order = order[:pre_nms_topN]

    #print np.sum([scores>0.7])
    scores = scores[order]
    proposals = proposals[order]
    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    #print np.shape(np.hstack ((proposals , scores))) # --> [x_start , y_start ,x_end, y_end , score ] 이런 형태로 만든다
    # proposals ndim and scores ndim must be same
    """
    NMS
    keep =non_maximum_supression(dets =np.hstack((proposals, scores)) , thresh = 0.3)
    keep = nms(np.hstack((proposals, scores)), nms_thresh) # nms_thresh = 0.7 | hstack --> axis =1
    #keep = non_maximum_supression(proposals , nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    """
    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) # N , 5
    return blob , scores , proposals_ori , scores_ori
Beispiel #21
0
def get_target_1st(all_anchors,
                   regr_layer,
                   img_data,
                   C,
                   roi_stride=10,
                   igthre=0.5,
                   posthre=0.7,
                   negthre=0.5):
    A = np.copy(all_anchors[:, :4])
    y_cls_batch, y_regr_batch = [], []
    for i in range(regr_layer.shape[0]):
        gta = np.copy(img_data[i]['bboxes'])
        num_bboxes = len(gta)
        ignoreareas = img_data[i]['ignoreareas']
        proposals = np.ones_like(all_anchors)
        bbox_deltas = regr_layer[i, :, :]
        bbox_deltas = bbox_deltas * np.array(
            C.classifier_regr_std).astype(dtype=np.float32)
        proposals[:, :4] = bbox_transform_inv(A, bbox_deltas)
        proposals = clip_boxes(proposals, [C.random_crop[0], C.random_crop[1]])
        if len(ignoreareas) > 0:
            ignore_overlap = box_op(
                np.ascontiguousarray(proposals[:, :4], dtype=np.float),
                np.ascontiguousarray(ignoreareas, dtype=np.float))
            ignore_sum = np.sum(ignore_overlap, axis=1)
            proposals[ignore_sum > igthre, -1] = 0
        keep = filter_negboxes(proposals, roi_stride)
        proposals[keep, -1] = 0
        valid_idxs = np.where(proposals[:, -1] == 1)[0]
        # initialise empty output objectives
        y_alf_overlap = np.zeros((all_anchors.shape[0], 1))
        y_alf_negindex = np.zeros((all_anchors.shape[0], 1))
        y_is_box_valid = np.zeros((all_anchors.shape[0], 1))
        y_alf_regr = np.zeros((all_anchors.shape[0], 4))

        valid_anchors = proposals[valid_idxs, :]
        valid_alf_overlap = np.zeros((valid_anchors.shape[0], 1))
        valid_is_box_valid = np.zeros((valid_anchors.shape[0], 1))
        valid_rpn_regr = np.zeros((valid_anchors.shape[0], 4))
        if num_bboxes > 0:
            valid_overlap = bbox_overlaps(
                np.ascontiguousarray(valid_anchors, dtype=np.float),
                np.ascontiguousarray(gta, dtype=np.float))
            # find every anchor close to which bbox
            argmax_overlaps = valid_overlap.argmax(axis=1)
            max_overlaps = valid_overlap[np.arange(len(valid_idxs)),
                                         argmax_overlaps]
            # find which anchor closest to every bbox
            gt_argmax_overlaps = valid_overlap.argmax(axis=0)
            gt_max_overlaps = valid_overlap[gt_argmax_overlaps,
                                            np.arange(num_bboxes)]
            gt_argmax_overlaps = np.where(valid_overlap == gt_max_overlaps)[0]
            valid_alf_overlap[gt_argmax_overlaps] = 1
            valid_alf_overlap[max_overlaps >= posthre] = 1
            for j in range(len(gta)):
                inds = valid_overlap[:, j].ravel().argsort()[-3:]
                valid_alf_overlap[inds] = 1
            # get positives labels
            fg_inds = np.where(valid_alf_overlap == 1)[0]
            valid_is_box_valid[fg_inds] = 1
            anchor_box = valid_anchors[fg_inds, :4]
            gt_box = gta[argmax_overlaps[fg_inds], :]

            # compute regression targets
            valid_rpn_regr[fg_inds, :] = compute_targets(anchor_box,
                                                         gt_box,
                                                         C.classifier_regr_std,
                                                         std=True)
            # get negatives labels
            bg_inds = np.where((max_overlaps < negthre)
                               & (valid_is_box_valid.reshape((-1)) == 0))[0]
            valid_is_box_valid[bg_inds] = 1
            # transform to the original overlap and validbox
            y_alf_overlap[valid_idxs, :] = valid_alf_overlap
            y_is_box_valid[valid_idxs, :] = valid_is_box_valid
            y_alf_regr[valid_idxs, :] = valid_rpn_regr
            y_alf_negindex = y_is_box_valid - y_alf_overlap
        y_alf_cls = np.expand_dims(np.concatenate(
            [y_alf_overlap, y_alf_negindex], axis=1),
                                   axis=0)
        y_alf_regr = np.expand_dims(np.concatenate([y_alf_overlap, y_alf_regr],
                                                   axis=1),
                                    axis=0)

        y_cls_batch.append(y_alf_cls)
        y_regr_batch.append(y_alf_regr)
    y_cls_batch = np.concatenate(np.array(y_cls_batch), axis=0)
    y_regr_batch = np.concatenate(np.array(y_regr_batch), axis=0)

    return [y_cls_batch, y_regr_batch]
Beispiel #22
0
def test():
    os.environ['CUDA_VISIBLE_DEVICES'] = cfg.GPU
    roidata = Roidata(is_train=False, with_keypoints=False)
    img_ph = tf.placeholder(
        tf.float32, shape=[cfg.batch_size, cfg.image_size, cfg.image_size, 1])

    logits = small_net(img_ph, 1.0, is_training=False)

    ckpt_file = 'samller_output_calibration/refine-31970'
    saver = tf.train.Saver()
    tfconfig = tf.ConfigProto()
    # tfconfig = tf.ConfigProto(allow_soft_placement=True)
    tfconfig.gpu_options.allow_growth = True
    tfconfig.gpu_options.per_process_gpu_memory_fraction = 1.0
    sess = tf.Session(config=tfconfig)
    sess.run(tf.global_variables_initializer())
    saver.restore(sess, ckpt_file)

    result_file = open(
        'zebrish_yolo_143000_refine_smaller_with_gap_calibration.txt', 'w')
    test_timer = Timer()
    img, im_path, proposal, gt_boxes, score = roidata.get()
    # print(proposal, gt_boxes)
    feed_dict = {img_ph: img}
    # for i in range(100):
    #     logits_val = sess.run(logits, feed_dict=feed_dict)
    for i in range(len(roidata.data)):

        # if score < 0.5:
        img, im_path, proposal, gt_boxes, score = roidata.get()
        # print(proposal, gt_boxes)
        feed_dict = {img_ph: img}
        # cv2.imshow("origin", img[0])
        test_timer.tic()
        logits_val = sess.run(logits, feed_dict=feed_dict)
        test_timer.toc()
        logits_val = logits_val * np.array(cfg.BBOX_NORMALIZE_STDS)
        proposalnp = np.array([proposal], dtype=np.float32)
        pred_gt = bbox_transform_inv(proposalnp, logits_val)

        print('Average detecting time: {:.4f}s'.format(
            test_timer.average_time))
        origin_img = cv2.imread(im_path)
        # # print(im_path)
        im_index = im_path.split('/')[-1][:-4]
        # # print(im_index)
        print(pred_gt)
        pred_gt = clip_boxes(
            pred_gt,
            [origin_img.shape[0], origin_img.shape[1]])[0].astype(np.int32)

        result_file.write('{:s} {:.4f} {:d} {:d} {:d} {:d}\n'.format(
            im_index, score, pred_gt[0], pred_gt[1], pred_gt[2], pred_gt[3]))

        cv2.rectangle(origin_img, (proposal[0], proposal[1]),
                      (proposal[2], proposal[3]), (255, 0, 0))
        cv2.rectangle(origin_img, (gt_boxes[0], gt_boxes[1]),
                      (gt_boxes[2], gt_boxes[3]), (0, 255, 0))
        cv2.rectangle(origin_img, (pred_gt[0], pred_gt[1]),
                      (pred_gt[2], pred_gt[3]), (0, 0, 255))
        cv2.imshow("test", origin_img)
        cv2.waitKey(0)
Beispiel #23
0
    def forward(self, input):
        # input[0]: (batch_size, channels, H, W) = (1, 24, 19, 20)
        # input[1]: (batch_size, channels, H, W) = (1, 12*4, 19, 20)
        # input[2]: (batch_size, H, W) = (1, 240, 320)
        # input[3]: "TEST" or "TRAIN"

        all_anchors = self.all_anchors.cuda()

        scores = input[
            0][:, self.
               _num_anchors_type:, :, :]  # class score (binary) for each feature map pixel
        bbox_deltas = input[
            1]  # bbox for each feature map pixel, size (batch_size, 48, 19, 20)
        im_info = input[
            2]  # image shape, for jhmdb, it is [[240, 320]] TODO1: change this to [240, 320]
        cfg_key = input[3]  # TRAIN or TEST
        im_info = np.array(im_info)

        pre_nms_topN = cfg[
            cfg_key].RPN_PRE_NMS_TOP_N  # train: 12000, test: 6000
        post_nms_topN = cfg[
            cfg_key].RPN_POST_NMS_TOP_N  # train: 2000,  test: 300
        nms_thresh = cfg[cfg_key].RPN_NMS_THRESH  # train: 0.7,   test: 0.7
        min_size = cfg[cfg_key].RPN_MIN_SIZE  # train: 8,     test: 16

        batch_size = bbox_deltas.size(0)  # mostly 1

        # since the anchors are obtained from dataset, we can just use it, change it to
        # (batch_size, 3600, 4) TODO: this is different from origin
        all_anchors = all_anchors.contiguous()
        all_anchors = all_anchors.view(1, self.all_num_anchors,
                                       4).expand(batch_size,
                                                 self.all_num_anchors, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:  change to (batch_size, 19, 20, 48)
        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        # batch_size, 19, 25, 12
        scores = scores.permute(0, 2, 3, 1).contiguous()
        '''
        x = torch.randn(5, 4)
        print(x.stride(), x.is_contiguous())
        print(x.t().stride(), x.t().is_contiguous())
        x.view(4, 5) # ok
        x.t().view(4, 5) # fails
        '''
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        # so we get a big list of bbox
        ## slide anchors on each pixels on the feature map 19*20, get bounding boxes
        # achors, 630 * 4, means 630 anchors, with 4 coordinates
        #  bbox_deltas, batch_size, 19, 20, 48.  48 means 4 cooridnates * 12 anchors
        # all_anchors.shape = 1x3600x4, bbox_deltas.shape=1x3600x4
        proposals = bbox_transform_inv2(all_anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image: TODO: this line is useless, since our input anchor is already fixed with
        # image size.
        ## remove the bboxes that outside of the image boundary
        # proposals.shape = [1, 3600, 4]), im_info = [[240, 320]]
        proposals = clip_boxes(proposals, im_info, batch_size)

        scores_keep = scores  #(batch_size, 12, 19, 25)
        proposals_keep = proposals

        _, order = torch.sort(
            scores_keep, 1,
            True)  # sort 12 'anchors', here is the cnn output score

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[
                i]  # for one batch, all the anchors for each feature map pixel
            # size: (12, 19, 25)
            scores_single = scores_keep[
                i]  # binary class score for each feature map pixel
            # size: (12, 19, 25)

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh,
                             force_cpu=not cfg.USE_GPU_NMS)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Beispiel #24
0
    def detect(self, img, threshold=0.5, scales=[1.0], do_flip=False):
        #print('in_detect', threshold, scales, do_flip, do_nms)
        proposals_list = []
        scores_list = []
        landmarks_list = []
        strides_list = []
        timea = datetime.datetime.now()
        flips = [0]
        if do_flip:
            flips = [0, 1]

        imgs = [img]
        if isinstance(img, list):
            imgs = img
        for img in imgs:
            for im_scale in scales:
                for flip in flips:
                    if im_scale != 1.0:
                        im = cv2.resize(img,
                                        None,
                                        None,
                                        fx=im_scale,
                                        fy=im_scale,
                                        interpolation=cv2.INTER_LINEAR)
                    else:
                        im = img.copy()
                    if flip:
                        im = im[:, ::-1, :]
                    if self.nocrop:
                        if im.shape[0] % 32 == 0:
                            h = im.shape[0]
                        else:
                            h = (im.shape[0] // 32 + 1) * 32
                        if im.shape[1] % 32 == 0:
                            w = im.shape[1]
                        else:
                            w = (im.shape[1] // 32 + 1) * 32
                        _im = np.zeros((h, w, 3), dtype=np.float32)
                        _im[0:im.shape[0], 0:im.shape[1], :] = im
                        im = _im
                    else:
                        im = im.astype(np.float32)
                    if self.debug:
                        timeb = datetime.datetime.now()
                        diff = timeb - timea
                        print('X1 uses', diff.total_seconds(), 'seconds')
                    #self.model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))], for_training=False)
                    #im_info = [im.shape[0], im.shape[1], im_scale]
                    im_info = [im.shape[0], im.shape[1]]
                    im_tensor = np.zeros((1, 3, im.shape[0], im.shape[1]))
                    for i in range(3):
                        im_tensor[0, i, :, :] = (
                            im[:, :, 2 - i] / self.pixel_scale -
                            self.pixel_means[2 - i]) / self.pixel_stds[2 - i]
                    if self.debug:
                        timeb = datetime.datetime.now()
                        diff = timeb - timea
                        print('X2 uses', diff.total_seconds(), 'seconds')
                    data = nd.array(im_tensor)
                    db = mx.io.DataBatch(data=(data, ),
                                         provide_data=[('data', data.shape)])
                    if self.debug:
                        timeb = datetime.datetime.now()
                        diff = timeb - timea
                        print('X3 uses', diff.total_seconds(), 'seconds')
                    self.model.forward(db, is_train=False)
                    net_out = self.model.get_outputs()
                    #post_nms_topN = self._rpn_post_nms_top_n
                    #min_size_dict = self._rpn_min_size_fpn

                    sym_idx = 0

                    for _idx, s in enumerate(self._feat_stride_fpn):
                        #if len(scales)>1 and s==32 and im_scale==scales[-1]:
                        #  continue
                        _key = 'stride%s' % s
                        stride = int(s)
                        is_cascade = False
                        if self.cascade:
                            is_cascade = True
                        #if self.vote and stride==4 and len(scales)>2 and (im_scale==scales[0]):
                        #  continue
                        #print('getting', im_scale, stride, idx, len(net_out), data.shape, file=sys.stderr)
                        scores = net_out[sym_idx].asnumpy()
                        if self.debug:
                            timeb = datetime.datetime.now()
                            diff = timeb - timea
                            print('A uses', diff.total_seconds(), 'seconds')
                        #print(scores.shape)
                        #print('scores',stride, scores.shape, file=sys.stderr)
                        scores = scores[:, self._num_anchors['stride%s' %
                                                             s]:, :, :]

                        bbox_deltas = net_out[sym_idx + 1].asnumpy()

                        #if DEBUG:
                        #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                        #    print 'scale: {}'.format(im_info[2])

                        #_height, _width = int(im_info[0] / stride), int(im_info[1] / stride)
                        height, width = bbox_deltas.shape[
                            2], bbox_deltas.shape[3]

                        A = self._num_anchors['stride%s' % s]
                        K = height * width
                        anchors_fpn = self._anchors_fpn['stride%s' % s]
                        anchors = anchors_plane(height, width, stride,
                                                anchors_fpn)
                        #print((height, width), (_height, _width), anchors.shape, bbox_deltas.shape, scores.shape, file=sys.stderr)
                        anchors = anchors.reshape((K * A, 4))
                        #print('num_anchors', self._num_anchors['stride%s'%s], file=sys.stderr)
                        #print('HW', (height, width), file=sys.stderr)
                        #print('anchors_fpn', anchors_fpn.shape, file=sys.stderr)
                        #print('anchors', anchors.shape, file=sys.stderr)
                        #print('bbox_deltas', bbox_deltas.shape, file=sys.stderr)
                        #print('scores', scores.shape, file=sys.stderr)

                        #scores = self._clip_pad(scores, (height, width))
                        scores = scores.transpose((0, 2, 3, 1)).reshape(
                            (-1, 1))

                        #print('pre', bbox_deltas.shape, height, width)
                        #bbox_deltas = self._clip_pad(bbox_deltas, (height, width))
                        #print('after', bbox_deltas.shape, height, width)
                        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1))
                        bbox_pred_len = bbox_deltas.shape[3] // A
                        #print(bbox_deltas.shape)
                        bbox_deltas = bbox_deltas.reshape((-1, bbox_pred_len))
                        bbox_deltas[:,
                                    0::4] = bbox_deltas[:, 0::
                                                        4] * self.bbox_stds[0]
                        bbox_deltas[:,
                                    1::4] = bbox_deltas[:, 1::
                                                        4] * self.bbox_stds[1]
                        bbox_deltas[:,
                                    2::4] = bbox_deltas[:, 2::
                                                        4] * self.bbox_stds[2]
                        bbox_deltas[:,
                                    3::4] = bbox_deltas[:, 3::
                                                        4] * self.bbox_stds[3]
                        proposals = self.bbox_pred(anchors, bbox_deltas)

                        #print(anchors.shape, bbox_deltas.shape, A, K, file=sys.stderr)
                        if is_cascade:
                            cascade_sym_num = 0
                            cls_cascade = False
                            bbox_cascade = False
                            __idx = [3, 4]
                            if not self.use_landmarks:
                                __idx = [2, 3]
                            for diff_idx in __idx:
                                if sym_idx + diff_idx >= len(net_out):
                                    break
                                body = net_out[sym_idx + diff_idx].asnumpy()
                                if body.shape[1] // A == 2:  #cls branch
                                    if cls_cascade or bbox_cascade:
                                        break
                                    else:
                                        cascade_scores = body[:, self.
                                                              _num_anchors[
                                                                  'stride%s' %
                                                                  s]:, :, :]
                                        cascade_scores = cascade_scores.transpose(
                                            (0, 2, 3, 1)).reshape((-1, 1))
                                        #scores = (scores+cascade_scores)/2.0
                                        scores = cascade_scores  #TODO?
                                        cascade_sym_num += 1
                                        cls_cascade = True
                                        #print('find cascade cls at stride', stride)
                                elif body.shape[1] // A == 4:  #bbox branch
                                    cascade_deltas = body.transpose(
                                        (0, 2, 3, 1)).reshape(
                                            (-1, bbox_pred_len))
                                    cascade_deltas[:, 0::
                                                   4] = cascade_deltas[:, 0::
                                                                       4] * self.bbox_stds[
                                                                           0]
                                    cascade_deltas[:, 1::
                                                   4] = cascade_deltas[:, 1::
                                                                       4] * self.bbox_stds[
                                                                           1]
                                    cascade_deltas[:, 2::
                                                   4] = cascade_deltas[:, 2::
                                                                       4] * self.bbox_stds[
                                                                           2]
                                    cascade_deltas[:, 3::
                                                   4] = cascade_deltas[:, 3::
                                                                       4] * self.bbox_stds[
                                                                           3]
                                    proposals = self.bbox_pred(
                                        proposals, cascade_deltas)
                                    cascade_sym_num += 1
                                    bbox_cascade = True
                                    #print('find cascade bbox at stride', stride)

                        proposals = clip_boxes(proposals, im_info[:2])

                        #if self.vote:
                        #  if im_scale>1.0:
                        #    keep = self._filter_boxes2(proposals, 160*im_scale, -1)
                        #  else:
                        #    keep = self._filter_boxes2(proposals, -1, 100*im_scale)
                        #  if stride==4:
                        #    keep = self._filter_boxes2(proposals, 12*im_scale, -1)
                        #    proposals = proposals[keep, :]
                        #    scores = scores[keep]

                        #keep = self._filter_boxes(proposals, min_size_dict['stride%s'%s] * im_info[2])
                        #proposals = proposals[keep, :]
                        #scores = scores[keep]
                        #print('333', proposals.shape)
                        if stride == 4 and self.decay4 < 1.0:
                            scores *= self.decay4

                        scores_ravel = scores.ravel()
                        #print('__shapes', proposals.shape, scores_ravel.shape)
                        #print('max score', np.max(scores_ravel))
                        order = np.where(scores_ravel >= threshold)[0]
                        #_scores = scores_ravel[order]
                        #_order = _scores.argsort()[::-1]
                        #order = order[_order]
                        proposals = proposals[order, :]
                        scores = scores[order]
                        if flip:
                            oldx1 = proposals[:, 0].copy()
                            oldx2 = proposals[:, 2].copy()
                            proposals[:, 0] = im.shape[1] - oldx2 - 1
                            proposals[:, 2] = im.shape[1] - oldx1 - 1

                        proposals[:, 0:4] /= im_scale

                        proposals_list.append(proposals)
                        scores_list.append(scores)
                        if self.nms_threshold < 0.0:
                            _strides = np.empty(shape=(scores.shape),
                                                dtype=np.float32)
                            _strides.fill(stride)
                            strides_list.append(_strides)

                        if not self.vote and self.use_landmarks:
                            landmark_deltas = net_out[sym_idx + 2].asnumpy()
                            #landmark_deltas = self._clip_pad(landmark_deltas, (height, width))
                            landmark_pred_len = landmark_deltas.shape[1] // A
                            landmark_deltas = landmark_deltas.transpose(
                                (0, 2, 3, 1)).reshape(
                                    (-1, 5, landmark_pred_len // 5))
                            landmark_deltas *= self.landmark_std
                            #print(landmark_deltas.shape, landmark_deltas)
                            landmarks = self.landmark_pred(
                                anchors, landmark_deltas)
                            landmarks = landmarks[order, :]

                            if flip:
                                landmarks[:, :,
                                          0] = im.shape[1] - landmarks[:, :,
                                                                       0] - 1
                                #for a in range(5):
                                #  oldx1 = landmarks[:, a].copy()
                                #  landmarks[:,a] = im.shape[1] - oldx1 - 1
                                order = [1, 0, 2, 4, 3]
                                flandmarks = landmarks.copy()
                                for idx, a in enumerate(order):
                                    flandmarks[:, idx, :] = landmarks[:, a, :]
                                    #flandmarks[:, idx*2] = landmarks[:,a*2]
                                    #flandmarks[:, idx*2+1] = landmarks[:,a*2+1]
                                landmarks = flandmarks
                            landmarks[:, :, 0:2] /= im_scale
                            #landmarks /= im_scale
                            #landmarks = landmarks.reshape( (-1, landmark_pred_len) )
                            landmarks_list.append(landmarks)
                            #proposals = np.hstack((proposals, landmarks))
                        if self.use_landmarks:
                            sym_idx += 3
                        else:
                            sym_idx += 2
                        if is_cascade:
                            sym_idx += cascade_sym_num

        if self.debug:
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('B uses', diff.total_seconds(), 'seconds')
        proposals = np.vstack(proposals_list)
        landmarks = None
        if proposals.shape[0] == 0:
            if self.use_landmarks:
                landmarks = np.zeros((0, 5, 2))
            if self.nms_threshold < 0.0:
                return np.zeros((0, 6)), landmarks
            else:
                return np.zeros((0, 5)), landmarks
        scores = np.vstack(scores_list)
        #print('shapes', proposals.shape, scores.shape)
        scores_ravel = scores.ravel()
        order = scores_ravel.argsort()[::-1]
        #if config.TEST.SCORE_THRESH>0.0:
        #  _count = np.sum(scores_ravel>config.TEST.SCORE_THRESH)
        #  order = order[:_count]
        proposals = proposals[order, :]
        scores = scores[order]
        if self.nms_threshold < 0.0:
            strides = np.vstack(strides_list)
            strides = strides[order]
        if not self.vote and self.use_landmarks:
            landmarks = np.vstack(landmarks_list)
            landmarks = landmarks[order].astype(np.float32, copy=False)

        if self.nms_threshold > 0.0:
            pre_det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                    copy=False)
            if not self.vote:
                keep = self.nms(pre_det)
                det = np.hstack((pre_det, proposals[:, 4:]))
                det = det[keep, :]
                if self.use_landmarks:
                    landmarks = landmarks[keep]
            else:
                det = np.hstack((pre_det, proposals[:, 4:]))
                det = self.bbox_vote(det)
        elif self.nms_threshold < 0.0:
            det = np.hstack(
                (proposals[:, 0:4], scores, strides)).astype(np.float32,
                                                             copy=False)
        else:
            det = np.hstack((proposals[:, 0:4], scores)).astype(np.float32,
                                                                copy=False)

        if self.debug:
            timeb = datetime.datetime.now()
            diff = timeb - timea
            print('C uses', diff.total_seconds(), 'seconds')
        return det, landmarks
Beispiel #25
0
def proposal_layer_py(rpn_bbox_cls_prob, rpn_bbox_pred, im_dims, mode,
                      feat_strides, anchor_scales):
    """
    
    # clip predicted boxes to image
    # remove predicted boxes with either height or width < threshold
    # sort all ( proposal , score) pairs by score from highest to lowest
    # take top pre_nums_ no N proposal before non-maximal suppresion
    # appy NMS with threshold 0.7 to remaining proposals
    # take after_nms_topN proposals after NMS
    # return the top proposlas ( -> ROIs, top, scores top)
    
    """

    anchors = generate_anchors.generate_anchors(base_size=16,
                                                ratios=[0.5, 1, 2],
                                                scales=anchor_scales)
    num_anchors = anchors.shape[0]
    rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob,
                                     [0, 3, 1, 2])  # [1, 9*2, height, width ]
    rpn_bbox_pred = np.transpose(rpn_bbox_pred,
                                 [0, 3, 1, 2])  # [1, 9*4, height, width ]

    if mode == 'train':
        pre_nms_topN = 12000
        post_nms_topN = 2000
        nms_thresh = 0.7
        min_size = 16
    else:
        pre_nms_topN = 6000
        post_nms_topN = 300
        nms_thresh = 0.7
        min_size = 16

    # the first set of num_anchors channels are bg probabilities, the second set are the fg probablilities.
    scores = rpn_bbox_cls_prob[:, :
                               num_anchors, :, :]  # score for fg probablilities, [1, 9, height, width]
    bbox_deltas = rpn_bbox_pred  # [1, 9*4, height, width]

    # step1 : generate proposal from bbox deltas and shifted anchors
    height, width = scores.shape[-2:]
    shift_x = np.arange(0, width) * feat_strides
    shift_y = np.arange(0, height) * feat_strides
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack(
        (shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel()))
    shifts = shifts.transpose()  #

    A = num_anchors  # number of anchor per shift = 9
    K = shifts.shape[0]  # number of shift
    aaa = anchors.reshape((1, A, 4))
    bbb = shifts.reshape(1, K, 4).transpose((1, 0, 2))
    anchors = aaa + bbb
    #anchors                    = anchors.reshape((1, A, 4 )) + shifts.reshape( 1, K, 4).transpose((1, 0, 2))
    anchors = anchors.reshape((K * A), 4)  # [ K*A, 4]

    # transpose and reshape predicted bbox transformations to get the same order as anchors
    # bbox_deltas is [1, 4*A, H, W ]
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape(
        (-1, 4))  # [ A*K, 4]
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))  # [ A*K, 1]

    # convert anchor into proposals via bbox transformations
    proposals = bbox_transform.bbox_transform_inv(anchors,
                                                  bbox_deltas)  # [K*A, 4]

    # step2 : clip predicted boxes accodring to image size
    proposals = bbox_transform.clip_boxes(proposals, im_dims)

    # step3: remove predicted boxes with either height or width < threshold
    keep = filter_boxes(proposals, min_size)
    proposals = proposals[keep, :]

    scores = scores[keep]

    # step4: sort all (proposal, score) pairs by score from highest to lowest
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]

    # step5: take top pre_nms_topN
    proposals = proposals[order, :]
    scores = scores[order]

    # step6: apply nms ( e.g. threshold = 0.7 )
    keep = cpu_nms.cpu_nms(np.hstack((proposals, scores)), nms_thresh)

    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]

    # step7: take after_nms_topN
    proposals = proposals[keep, :]
    scores = scores[keep]
    print "proposals.shape after nms", proposals.shape
    print "scores.shape", scores.shape
    # step8: return the top proposal
    batch_inds = np.zeros((proposals.shape[0], 1),
                          dtype=np.float32)  # [ len(keep), 1]

    blob = np.hstack(
        (batch_inds,
         proposals.astype(np.float32,
                          copy=False)))  # proposal structure: [0,x1,y1,x2,y2]
    print "blob.shape", blob.shape
    return blob