def post_process(self, im, sim_ops, scale_factor=1):
        """
		MUST HAVE FUNCTION IN ALL NETWORKS !!!! 
		Post-processing of the results from network. This function can be used to visualize data from hardware.  
		"""
        im = im[:, :, (2, 1, 0)]
        cls_score = sim_ops[0]
        cls_prob = sim_ops[1]
        bbox_pred = sim_ops[2]
        rois = sim_ops[3]
        boxes = rois[:, 1:5] / scale_factor
        scores = cls_prob
        box_deltas = bbox_pred
        pred_boxes = bbox_transform_inv(boxes, box_deltas, False)
        pred_boxes = self._clip_boxes(pred_boxes, im.shape)

        fig, ax = plt.subplots(figsize=(12, 12))
        ax.imshow(im, aspect='equal')
        CONF_THRESH = 0.6
        NMS_THRESH = 0.4
        for cls_ind, cls in enumerate(self.classes[1:]):
            cls_ind += 1  # because we skipped background
            cls_boxes = pred_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = nms(dets, NMS_THRESH)
            dets = dets[keep, :]
            self._vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
Exemple #2
0
def _unmap(label, target, tois):
    l = label.size
    r_diff = np.zeros((l, 4))
    for i in xrange(l):
        curr_label = int(label[i] - 1)
        r_diff[i] = target[i, curr_label * 4:curr_label * 4 + 4]

    pred = bbox_transform_inv(tois, r_diff)
    return pred
Exemple #3
0
	def post_process(self, im, sim_ops, scale_factor=1):
		"""
		MUST HAVE FUNCTION IN ALL NETWORKS !!!! 
		Post-processing of the results from network. This function can be used to visualize data from hardware.  
		self.post_process(im, [cls_score, cls_prob, bbox_pred, rois], scale_factor)	
		"""
                print("cls_score:\n")
                print(sim_ops[0])
                print("cls_prob:\n")
                print(sim_ops[1])
                print("bbox_pred:\n")
                print(sim_ops[2])
                print("rois:\n")
                print(sim_ops[3])
                print("scale_factor:\n")
                print(scale_factor)

		im = im[:, :, (2, 1, 0)]

		cls_score = sim_ops[0]
		cls_score = convert_to_float_py(cls_score, self._layer_map[77]['fl'])

		cls_prob = sim_ops[1]

		bbox_pred = sim_ops[2]
		bbox_pred = convert_to_float_py(bbox_pred, self._layer_map[78]['fl'])

		rois = sim_ops[3]
		boxes = rois[:, 1:5] / scale_factor

		# ABINASH ONLY FOR DEBUG DELETE IT 
		scores = cls_prob
		#scores = cls_score
		box_deltas = bbox_pred
		pred_boxes = bbox_transform_inv(boxes, box_deltas, False)
		pred_boxes = self._clip_boxes(pred_boxes, im.shape)	

		fig, ax = plt.subplots(figsize=(12, 12))
		ax.imshow(im, aspect='equal')
		CONF_THRESH = 0.6
		NMS_THRESH = 0.4
		for cls_ind, cls in enumerate(self.classes[1:]):
			cls_ind += 1  # because we skipped background
			cls_boxes = pred_boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]

                        print("TL DEBUG, pred_boxes shape: %s, cls_boxes shape: %s, scores shape: %s, cls_scores index: %d\n" %(str(pred_boxes.shape),str(cls_boxes.shape),str(scores.shape), cls_ind))

			cls_scores = scores[:, cls_ind]
                        print(cls_scores)
			dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
			keep = nms(dets, NMS_THRESH)
			dets = dets[keep, :]
			self._vis_detections(im, cls, dets, ax, thresh=CONF_THRESH)
		plt.show()		
Exemple #4
0
def pred_bbox(anchors, diff):
    for j in xrange(40):
        diff[j, 0] = max(min(diff[j, 0], 0.3), -0.3)
        diff[j, 1] = max(min(diff[j, 1], 0.3), -0.3)
        diff[j, 2] = max(min(diff[j, 2], 0.5), -0.5)
        diff[j, 3] = max(min(diff[j, 3], 0.5), -0.5)
    pred = bbox_transform_inv(anchors, diff)
    for j in xrange(40):
        pred[j, 0] = max(min(pred[j, 0], 19), 0)
        pred[j, 1] = max(min(pred[j, 1], 14), 0)
        pred[j, 2] = max(min(pred[j, 2], 19), 0)
        pred[j, 3] = max(min(pred[j, 3], 14), 0)

    return pred
Exemple #5
0
def proposal_layer(bbox_pred, iou_pred, cls_pred, anchors, ls):
    box_pred = bbox_transform_inv(
        np.ascontiguousarray(bbox_pred, dtype=np.float32),
        np.ascontiguousarray(anchors, dtype=np.float32), ls, ls) * cfg.INP_SIZE

    box_pred = np.reshape(box_pred, [-1, 4])

    iou_pred = np.reshape(iou_pred, [-1, 1])

    cls_pred = np.reshape(cls_pred, [-1, cfg.NUM_CLASSES])

    cls_inds = np.argmax(cls_pred, axis=1)
    cls_prob = cls_pred[np.arange(cls_pred.shape[0]), cls_inds][:, np.newaxis]

    scores = iou_pred * cls_prob

    # filter out boxes with scores <= coef thresh
    keep = np.where(scores >= cfg.COEF_THRESH)[0]
    # keep top n scores before apply nms
    keep = keep[np.argsort(-scores[keep, 0])[:cfg.PRE_NMS_TOP_N]]

    box_pred = box_pred[keep]
    cls_inds = cls_inds[keep]
    scores = scores[keep]

    # apply nms with top-n-score boxes
    keep = np.zeros(len(box_pred), dtype=np.int8)
    for i in range(cfg.NUM_CLASSES):
        inds = np.where(cls_inds == i)[0]
        if len(inds) == 0:
            continue

        keep_in_cls = nms_detection(np.hstack([box_pred[inds], scores[inds]]),
                                    cfg.NMS_THRESH)

        keep[inds[keep_in_cls]] = 1

    keep = np.where(keep > 0)

    box_pred = box_pred[keep]
    cls_inds = cls_inds[keep].astype(np.int8)
    scores = scores[keep][:, 0]

    # clip boxes inside image
    box_pred = clip_boxes(np.ascontiguousarray(box_pred, dtype=np.float32),
                          cfg.INP_SIZE, cfg.INP_SIZE)

    return box_pred, cls_inds, scores
Exemple #6
0
def main(args):
    image_path = args.img_path
    img = cv2.imread(image_path)
    # height/width/channel
    height, width, _ = img.shape
    # img resize
    img = cv2.resize(img, im_size, interpolation=cv2.INTER_CUBIC)

    imgs, rects = get_proposal(img)

    # get model
    input_tensor = Input(shape=im_size + (3, ))
    model = get_model(input_tensor, classes_num)

    features_model = get_features_model(model)
    if not os.path.exists(args.weights):
        raise Exception('model weights not exists, please check it')
    features_model.load_weights(args.weights, by_name=True)

    features = features_model.predict_on_batch()

    # load svm and ridge
    svm_fit = joblib.load('./svm/svm.pkl')
    bbox_fit = joblib.load('./svm/bbox_train.pkl')

    svm_pred = svm_fit.predict(features)
    bbox_pred = bbox_fit.predict(features)

    keep = svm_pred[svm_pred != 0]

    # 取出预测是物体的anchors
    # svm_pred = svm_pred[keep]
    rects = rects[keep]
    bbox_pred = bbox_pred[keep]

    # 边框修复
    pred_boxes = bbox_transform_inv(rects, bbox_pred)

    # 非极大值抑制
    keep_ind = py_cpu_nms(pred_boxes, 0.5)
    #
    pred_boxes = pred_boxes[keep_ind, :]

    # pred_boxes[:, [2, 3]] = pred_boxes[:, [2, 3]] - pred_boxes[:, [0, 1]]
    pred_boxes[:, 2] = pred_boxes[:, 2] - pred_boxes[:, 0]
    pred_boxes[:, 3] = pred_boxes[:, 3] - pred_boxes[:, 1]
    # # show img
    show_rect(image_path, pred_boxes)
Exemple #7
0
def proposal_layer(rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key, _feat_stride, anchors, num_anchors):
  """A simplified version compared to fast/er RCNN
     For details please see the technical report
  """
  if type(cfg_key) == bytes:
      cfg_key = cfg_key.decode('utf-8')
  pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
  post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
  nms_thresh = cfg[cfg_key].RPN_NMS_THRESH

  # Get the scores and bounding boxes
  '''
  scores = tf.reshape(rpn_cls_prob, shape=(-1, 2))
  scores = scores[:, 1:]
  '''
  scores = rpn_cls_prob[:, :, :, num_anchors:]
  scores = scores.reshape((-1, 1))
  
  rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
  proposals = bbox_transform_inv(anchors, rpn_bbox_pred)
  proposals = clip_boxes(proposals, im_info[:2])

  # Pick the top region proposals
  order = scores.ravel().argsort()[::-1]
  if pre_nms_topN > 0:
    order = order[:pre_nms_topN]
  proposals = proposals[order, :]
  scores = scores[order]

  # Non-maximal suppression
  keep = nms(np.hstack((proposals, scores)), nms_thresh)

  # Pick th top region proposals after NMS
  if post_nms_topN > 0:
    keep = keep[:post_nms_topN]
  proposals = proposals[keep, :]
  scores = scores[keep]

  # Only support single image as input
  batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
  blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

  return blob, scores
Exemple #8
0
def proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, im_info, _feat_stride,
                       anchors, num_anchors):
    """A layer that just selects the top region proposals
     without using non-maximal suppression,
     For details please see the technical report
  """
    rpn_top_n = cfg.TEST.RPN_TOP_N

    scores = rpn_cls_prob[:, :, :, num_anchors:]

    rpn_bbox_pred = rpn_bbox_pred.reshape((-1, 4))
    scores = scores.reshape((-1, 1))

    length = scores.shape[0]
    if length < rpn_top_n:
        # Random selection, maybe unnecessary and loses good proposals
        # But such case rarely happens
        top_inds = npr.choice(length, size=rpn_top_n, replace=True)
    else:
        top_inds = scores.argsort(0)[::-1]
        top_inds = top_inds[:rpn_top_n]
        top_inds = top_inds.reshape(rpn_top_n, )

    # Do the selection here
    anchors = anchors[top_inds, :]
    rpn_bbox_pred = rpn_bbox_pred[top_inds, :]
    scores = scores[top_inds]

    # Convert anchors into proposals via bbox transformations
    proposals = bbox_transform_inv(anchors, rpn_bbox_pred)

    # Clip predicted boxes to image
    proposals = clip_boxes(proposals, im_info[:2])

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob, scores
def bbox_reg_target(fastrcnn_reg_output, labels, rois, scores):
    """
    得到修正后的边框
    :param fastrcnn_reg_output:
    :param labels:
    :param rois:
    :return:
    """
    inds = np.where(labels[labels > 0])[0]
    bbox_reg = np.zeros((len(inds), 5))
    for ind in inds:
        cls = labels[ind]
        start = (int(cls) - 1) * 4
        end = start + 4
        bbox_reg[ind, 0] = cls
        bbox_reg[ind, 1:] = fastrcnn_reg_output[ind, start:end]
    rois = rois[inds, ...]
    # len(rois) == len(bbox_reg)
    pred_boxes = bbox_transform_inv(rois[:, 1], bbox_reg[:, 1:])
    # (None, 6) x1, y1, x2, y2, score, cls
    final_pred_boxes = np.hstack(
        (pred_boxes, scores[inds, np.newaxis], labels[inds, np.newaxis]))
    return final_pred_boxes
Exemple #10
0
def test_net(test_split, net, batchsize, use_kld=cfg.USE_KLD, use_reg=cfg.USE_REG, threshold=cfg.OVERLAP_THRESHOLD,
             topk=cfg.TOPK, vis=False):
    print('validate split: %s' % test_split)
    rpn_topn = cfg.RPN_TOPN
    # dp = get_data_provider(data_split=test_split, batchsize=batchsize)
    dp = DDPNDataProvider(data_split=test_split, batchsize=batchsize)
    num_query = dp.get_num_query()
    num_right = 0

    if cfg.NTHREADS > 1:
        try:
            import torch
            dataloader = torch.utils.data.DataLoader(dp,
                                                     batch_size=batchsize,
                                                     shuffle=False,
                                                     num_workers=int(cfg.NTHREADS))
        except:
            cfg.NTHREADS = 1
            dataloader = dp
    else:
        dataloader = dp
    count = 0
    for data in dataloader:
        if data is None:
            break
        data = map(np.array, data)
        my_complete_data = functools.partial(complete_data, batchsize=batchsize)
        gt_boxes, qvec, cvec, img_feat, bbox, img_shape, spt_feat, query_label, query_label_mask, \
        query_bbox_targets, query_bbox_inside_weights, query_bbox_outside_weights, valid_data, iid_list = map(
            my_complete_data, data)

        tp_qvec = qvec.copy()
        tp_cvec = cvec.copy()
        qvec = np.transpose(qvec, (1, 0))
        cvec = np.transpose(cvec, (1, 0))
        query_bbox_targets = query_bbox_targets.reshape(-1, 4)
        query_bbox_inside_weights = query_bbox_inside_weights.reshape(-1, 4)
        query_bbox_outside_weights = query_bbox_outside_weights.reshape(-1, 4)
        # net.blobs['queries'].reshape(*(qvec.shape))
        # net.blobs['query_cont'].reshape(*(cvec.shape))
        # net.blobs['img_feat'].reshape(*(img_feat.shape))
        # net.blobs['spt_feat'].reshape(*(spt_feat.shape))
        # net.blobs['query_label'].reshape(*query_label.shape)
        # net.blobs['query_label_mask'].reshape(*query_label_mask.shape)
        # net.blobs['query_bbox_targets'].reshape(*query_bbox_targets.shape)
        # net.blobs['query_bbox_inside_weights'].reshape(*query_bbox_inside_weights.shape)
        # net.blobs['query_bbox_outside_weights'].reshape(*query_bbox_outside_weights.shape)
        # forward_kwargs = {  'qvec': qvec.astype(np.float32, copy=False), \
        #                     'cvec': cvec.astype(np.float32, copy=False), \
        #                     'img_feat': img_feat.astype(np.float32, copy=False), \
        #                     'spt_feat': spt_feat.astype(np.float32, copy=False), \
        #                     'query_label': query_label.astype(np.float32, copy=False), \
        #                     'query_label_mask': query_label_mask.astype(np.float32, copy=False), \
        #                     'query_bbox_targets': query_bbox_targets.astype(np.float32, copy=False), \
        #                     'query_bbox_inside_weights': query_bbox_inside_weights.astype(np.float32, copy=False), \
        #                     'query_bbox_outside_weights': query_bbox_outside_weights.astype(np.float32, copy=False)}
        net.blobs['qvec'].data.reshape(*qvec.shape)
        net.blobs['qvec'].data[...] = qvec

        net.blobs['cvec'].data.reshape(*cvec.shape)
        net.blobs['cvec'].data[...] = cvec

        net.blobs['img_feat'].data.reshape(*img_feat.shape)
        net.blobs['img_feat'].data[...] = img_feat

        net.blobs['spt_feat'].data.reshape(*spt_feat.shape)
        net.blobs['spt_feat'].data[...] = spt_feat

        net.blobs['query_label'].data.reshape(*query_label.shape)
        net.blobs['query_label'].data[...] = query_label

        net.blobs['query_label_mask'].data.reshape(*query_label_mask.shape)
        net.blobs['query_label_mask'].data[...] = query_label_mask

        net.blobs['query_bbox_targets'].data.reshape(*query_bbox_targets.shape)
        net.blobs['query_bbox_targets'].data[...] = query_bbox_targets

        net.blobs['query_bbox_inside_weights'].data.reshape(*query_bbox_inside_weights.shape)
        net.blobs['query_bbox_inside_weights'].data[...] = query_bbox_inside_weights

        net.blobs['query_bbox_outside_weights'].data.reshape(*query_bbox_outside_weights.shape)
        net.blobs['query_bbox_outside_weights'].data[...] = query_bbox_outside_weights

        blobs_out = net.forward()
        # query_emb_tile = net.blobs['query_emb_tile'].data

        rois = bbox.copy()
        rois = rois.reshape(-1, 4)
        query_score_pred = net.blobs['query_score_pred'].data
        if use_reg:
            query_bbox_pred = net.blobs['query_bbox_pred'].data
            query_bbox_pred = bbox_transform_inv(rois, query_bbox_pred)
        else:
            query_bbox_pred = rois

        query_inds = np.argsort(-query_score_pred, axis=1)

        rois = rois.reshape(batchsize, rpn_topn, 4)
        query_bbox_pred = query_bbox_pred.reshape(batchsize, rpn_topn, 4)
        for i in range(batchsize):
            if valid_data[i] != 0:
                right_flag = False
                t_query_bbox_pred = clip_boxes(query_bbox_pred[i], img_shape[i])
                t_rois = clip_boxes(rois[i], img_shape[i])
                for j in range(topk):
                    query_ind = query_inds[i, j]

                    # overlaps = bbox_overlaps(
                    #     np.ascontiguousarray(query_bbox_pred[query_ind][np.newaxis], dtype=np.float),
                    #     np.ascontiguousarray(gt_boxes, dtype=np.float) )
                    iou = calc_iou(t_query_bbox_pred[query_ind], gt_boxes[i])
                    # print '%.2f percent:  %.2f'%((100 * float(i) / num_query), 100*iou)
                    if iou >= threshold:
                        num_right += 1
                        right_flag = True
                        break
                    # if overlaps[0].max() > threshold:
                    #     # json.dump([1], open(save_dir + '/right.json', 'w'))
                    #     print overlaps[0].max()
                    #     num_right += 1
                    #     break

                # debug pred
                if vis:
                    debug_dir = 'visual_pred_%s_%s' % (cfg.IMDB_NAME, test_split)
                    img_path = dp.get_img_path(int(iid_list[i]))
                    img = cv2.imread(img_path)
                    img.shape
                    debug_pred(debug_dir, count, tp_qvec[i], tp_cvec[i], img, gt_boxes[i], t_rois[query_ind],
                               t_query_bbox_pred[query_ind], iou)

            percent = 100 * float(count) / num_query
            sys.stdout.write('\r' + ('%.2f' % percent) + '%')
            sys.stdout.flush()
            count += 1
            if count >= num_query:
                break

    accuracy = num_right / float(num_query)
    print('accuracy: %f\n' % accuracy)
    return accuracy
    def forward(self, results, results2):
        self._net.blobs['data'].reshape(self._batch_size, 3, self._depth,
                                        self._height, self._width)
        self._net.blobs['tois'].reshape(self._batch_size * self.top * 8, 5)
        self._net.blobs['toi2'].reshape(self._batch_size * self.top * 8, 5)

        [clip, gt_bboxes, labels, all_pred, _,
         is_last] = self.dataset.next_val_video()
        labels = int(labels)
        n = int(clip.shape[0])

        rrrrr = []
        for i in xrange(n - self._depth + 1):
            batch_clip = clip[i:i + 1 * self._depth].transpose([3, 0, 1, 2])
            batch_clip = np.expand_dims(batch_clip, axis=0)

            pred = all_pred[i:i + 1 * self._depth]
            pred_anchors = np.reshape(pred, (-1, 4)) * 1.25
            curr_results1 = results[i]
            curr_results2 = results2[i]
            curr_results = (curr_results1 + curr_results2) * 0.5
            r1 = curr_results[:, :22]
            r2 = curr_results[:, 22:]
            curr_dets = {
                'boxes': np.empty((0, self._depth, 4)),
                'pred_label': np.empty((0)),
                'pred_scores': np.empty((0, 2)),
                'label_length': np.empty((0)),
            }
            tmp = r1.argmax(axis=1)
            for j in xrange(1, self.dataset._num_classes):
                ttmp = tmp[tmp == j]
                if ttmp.size > 0:
                    print('pred_labe')
                    print j
                argsort_r = np.argsort(r1[:, j])[-self.top:]
                curr_scores = np.vstack((r1[argsort_r, j], r2[argsort_r,
                                                              j])).transpose()
                curr_boxes = pred_anchors[argsort_r]
                curr_boxes = np.repeat(curr_boxes, 8, axis=0)
                batch_tois = np.hstack((np.zeros(
                    (curr_boxes.shape[0], 1)), curr_boxes))
                curr_idx = np.arange(self._depth).reshape(1, self._depth)
                curr_idx = np.repeat(curr_idx, self.top, axis=0).reshape(-1, 1)
                batch_toi2 = np.hstack((curr_idx, curr_boxes))

                self._net.blobs['data'].data[...] = batch_clip.astype(
                    np.float32, copy=False)
                self._net.blobs['tois'].data[...] = batch_tois.astype(
                    np.float32, copy=False)
                self._net.blobs['toi2'].data[...] = batch_toi2.astype(
                    np.float32, copy=False)

                self._net.forward()

                diff = self._net.blobs['fc8-2'].data[...][:, (j - 1) * 4:j * 4]

                #print ('hhahaha')
                #print self._net.blobs['fc8-2'].data[...][:,:]
                #print self._net.blobs['fc8-2'].data[...][40,:]
                #        diff[:,0:2] = np.maximum(-0.3, np.minimum(0.3, diff[:,0:2]))
                #        diff[:,2:4] = np.maximum(-0.5, np.minimum(0.5, diff[:,2:4]))

                boxes = bbox_transform_inv(batch_tois[:, 1:5], diff).reshape(
                    (self.top, 8, 4)) * 16

                boxes[:, :,
                      0::2] = np.maximum(0,
                                         np.minimum(398.75, boxes[:, :, 0::2]))
                boxes[:, :,
                      1::2] = np.maximum(0,
                                         np.minimum(298.75, boxes[:, :, 1::2]))

                curr_dets['boxes'] = np.vstack((curr_dets['boxes'], boxes))
                curr_dets['pred_label'] = np.hstack(
                    (curr_dets['pred_label'], np.ones(self.top) * j))
                curr_dets['pred_scores'] = np.vstack(
                    (curr_dets['pred_scores'], curr_scores))
                curr_dets['label_length'] = np.hstack(
                    (curr_dets['label_length'], ttmp.size))

            rrrrr.append(curr_dets)

        r = {'dets': rrrrr, 'gt_bboxes': gt_bboxes, 'gt_label': labels}
        '''
      stack_overlaps = np.empty((self._depth, self.top, gt_bboxes.shape[0]))
      for j in xrange(self._depth):
        curr_gt_idx = np.where(gt_bboxes[0,:,0] == i * self._depth + j)[0]
        curr_gt = gt_bboxes[:, curr_gt_idx, 1 : 5].reshape(-1, 4)
        overlaps = bbox_overlaps(
          np.ascontiguousarray(boxes[:, j], dtype=np.float),
          np.ascontiguousarray(curr_gt, dtype=np.float))
        stack_overlaps[j] = overlaps

        # Find wrong detections.

      for j in xrange(stack_overlaps.shape[2]):
        argmax_overlaps = np.sum(stack_overlaps[:,:,j], axis=0).argmax()
        ov[i * self._depth : (i+1) * self._depth, j] = stack_overlaps[:, argmax_overlaps, j]
    '''
        return is_last, r
Exemple #12
0
def test_gallery(net, dataloader, output_dir, thresh=0.):
    """test gallery images"""

    with open('config.yml', 'r') as f:
        config = yaml.load(f)

    num_images = len(dataloader.dataset)
    all_boxes = []
    all_features = []
    end = time.time()
    time_cost = AverageMeter()
    net.eval()

    for i, data in enumerate(dataloader):
        with torch.no_grad():
            im, (orig_shape, im_info) = data
            im = im.to(device)
            im_info = im_info.numpy().squeeze(0)
            orig_shape = [x.item() for x in orig_shape]

            scores, bbox_pred, rois, features = net.forward(im, None, im_info)

        boxes = rois[:, 1:5] / im_info[2]
        scores = np.reshape(scores, [scores.shape[0], -1])
        bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
        if config['test_bbox_reg']:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred
            pred_boxes = bbox_transform_inv(
                torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy()
            pred_boxes = clip_boxes(pred_boxes, orig_shape)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        boxes = pred_boxes

        # skip j = 0, because it's the background class
        j = 1
        inds = np.where(scores[:, j] > thresh)[0]
        cls_scores = scores[inds, j]
        cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
        cls_dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32,
                                                           copy=False)
        keep = nms(torch.from_numpy(cls_dets),
                   config['test_nms']).numpy() if cls_dets.size > 0 else []
        cls_dets = cls_dets[keep, :]
        all_boxes.append(cls_dets)
        all_features.append(features[inds][keep])

        time_cost.update(time.time() - end)
        end = time.time()
        print('im_detect: {:d}/{:d} {:.3f}s'.format(i + 1, num_images,
                                                    time_cost.avg))

    det_file = os.path.join(output_dir, 'gboxes.pkl')
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

    feature_file = os.path.join(output_dir, 'gfeatures.pkl')
    with open(feature_file, 'wb') as f:
        pickle.dump(all_features, f, pickle.HIGHEST_PROTOCOL)

    return all_boxes, all_features
    def forward(self, scores, bbox_delta, im_info, cfg_key):
        scores = scores[:, self._num_anchors:, :, :]

        pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH

        batch_size = bbox_delta.size(0)
        assert (batch_size == 1) # Only support batch size = 1

        # Get the full anchor
        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchor.reshape((1, A, 4)) + shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors_reshape = anchors.reshape((K * A, 4)).astype(np.float32, copy=False)

        # Convert the anchor into proposal
        bbox_delta = bbox_delta.permute(0, 2, 3, 1).contiguous()
        bbox_delta = bbox_delta.view(-1, 4)
        proposals = bbox_transform_inv(torch.from_numpy(anchors_reshape).type_as(bbox_delta), bbox_delta)
        proposals = clip_boxes(proposals, im_info)

        # choose the proposals
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(1, -1)

        # pick the top region proposals
        scores, order = scores.view(-1).sort(descending=True)
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
            scores = scores[:pre_nms_topN].view(-1, 1)
        proposals = proposals[order.data, :]

        # scores_keep = scores
        # _, order = torch.sort(scores_keep, 1, True)
        # if pre_nms_topN > 0:
        #     order_single = order[0]
        #     scores_single = scores[0]
        #     order_single = order_single[:pre_nms_topN]
        # proposals = proposals[order_single, :]
        # scores = scores_single[order_single].view(-1, 1)


        # Non-maximal suppression
        keep = nms(torch.cat((proposals, scores), 1).data, nms_thresh)

        # pick the  top region proposals after nms
        if post_nms_topN > 0:
            keep = keep[:post_nms_topN]
        proposals = proposals[keep, :]
        scores = scores[keep, :]

        # TODO: batch_size > 1
        # padding batch ids at the first row
        output = scores.new(post_nms_topN, 5).zero_()
        num_proposal = proposals.size(0)
        output[:num_proposal, 1:] = proposals

        return output, anchors_reshape
Exemple #14
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'
        if self.phase==0:
            cfg_key = 'TRAIN'
        elif self.phase==1:
            cfg_key = 'TEST'
        else:
            cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

        if cfg_key == 'TRAIN':
            nms_thresh = cfg[cfg_key].NMS_THRESH
            post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS
            pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS

        if cfg_key == 'TEST':
            pre_nms_topN =  cfg[cfg_key].N_DETS_PER_MODULE

        min_size = cfg[cfg_key].ANCHOR_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN

        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (if in training mode)
        # 7. take after_nms_topN
        # 8. return the top proposals (-> RoIs top)
        if self.phase == 0:
            # DO NMS ONLY IN TRAINING TIME
            # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION 
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]


        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if proposals.shape[0] == 0:
            blob = np.array([[0,0,0,16,16]],dtype=np.float32)
        else:
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
def pose_target_layer(rois, bbox_prob, bbox_pred, gt_boxes, poses,
                      is_training):

    rois = rois.detach().cpu().numpy()
    bbox_prob = bbox_prob.detach().cpu().numpy()
    bbox_pred = bbox_pred.detach().cpu().numpy()
    gt_boxes = gt_boxes.detach().cpu().numpy()
    num_classes = bbox_prob.shape[1]

    # process boxes
    if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
        stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes))
        means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS),
                        (num_classes))
        bbox_pred *= stds
        bbox_pred += means

    boxes = rois[:, 2:6].copy()
    pred_boxes = bbox_transform_inv(boxes, bbox_pred)

    # assign boxes
    for i in range(rois.shape[0]):
        cls = int(rois[i, 1])
        rois[i, 2:6] = pred_boxes[i, cls * 4:cls * 4 + 4]
        rois[i, 6] = bbox_prob[i, cls]

    # convert boxes to (batch_ids, x1, y1, x2, y2, cls)
    roi_blob = rois[:, (0, 2, 3, 4, 5, 1)]
    gt_box_blob = np.zeros((0, 6), dtype=np.float32)
    pose_blob = np.zeros((0, 9), dtype=np.float32)
    for i in range(gt_boxes.shape[0]):
        for j in range(gt_boxes.shape[1]):
            if gt_boxes[i, j, -1] > 0:
                gt_box = np.zeros((1, 6), dtype=np.float32)
                gt_box[0, 0] = i
                gt_box[0, 1:5] = gt_boxes[i, j, :4]
                gt_box[0, 5] = gt_boxes[i, j, 4]
                gt_box_blob = np.concatenate((gt_box_blob, gt_box), axis=0)
                poses[i, j, 0] = i
                pose_blob = np.concatenate(
                    (pose_blob, poses[i, j, :].cpu().reshape(1, 9)), axis=0)

    if gt_box_blob.shape[0] == 0:
        num = rois.shape[0]
        poses_target = np.zeros((num, 4 * num_classes), dtype=np.float32)
        poses_weight = np.zeros((num, 4 * num_classes), dtype=np.float32)
    else:
        # overlaps: (rois x gt_boxes)
        overlaps = bbox_overlaps(
            np.ascontiguousarray(roi_blob[:, :5], dtype=np.float),
            np.ascontiguousarray(gt_box_blob[:, :5], dtype=np.float))

        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
        labels = gt_box_blob[gt_assignment, 5]
        quaternions = pose_blob[gt_assignment, 2:6]

        # Select foreground RoIs as those with >= FG_THRESH overlap
        bg_inds = np.where(max_overlaps < cfg.TRAIN.FG_THRESH_POSE)[0]
        labels[bg_inds] = 0

        bg_inds = np.where(roi_blob[:, -1] != labels)[0]
        labels[bg_inds] = 0

        # in training, only use the positive boxes for pose regression
        if is_training:
            fg_inds = np.where(labels > 0)[0]
            if len(fg_inds) > 0:
                rois = rois[fg_inds, :]
                quaternions = quaternions[fg_inds, :]
                labels = labels[fg_inds]

        # pose regression targets and weights
        poses_target, poses_weight = _compute_pose_targets(
            quaternions, labels, num_classes)

    return torch.from_numpy(rois).cuda(), torch.from_numpy(
        poses_target).cuda(), torch.from_numpy(poses_weight).cuda()
Exemple #16
0
def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, fl_cls_prob, fl_bbox_pred, feat_stride=[16,], anchor_scales = [8, 16, 32], base_size = 10, ratios =[0.333, 0.5, 0.667, 1.0, 1.5, 2.0, 3.0], pre_nms_topN = 2000, max_nms_topN = 400, isHardware=False, num_stddev=2.0):
        """
        Parameters
        ----------
        rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
                                                 NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
        rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
        im_info: a list of [image_height, image_width, scale_ratios]
        cfg_key: 'TRAIN' or 'TEST'
        _feat_stride: the downsampling ratio of feature map to the original input image
        anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
        ----------
        Returns
        ----------
        rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]
        """
        _anchors =      generate_anchors(base_size, ratios, anchor_scales)
        _num_anchors = _anchors.shape[0]
        im_info = im_info[0]

        assert rpn_cls_prob_reshape.shape[0] == 1, \
                'Only single item batches are supported'

        # Convert fixed point int to floats fror internal calculations ! 
        rpn_cls_prob_reshape = convert_to_float_py(rpn_cls_prob_reshape, fl_cls_prob)
        rpn_bbox_pred = convert_to_float_py(rpn_bbox_pred, fl_bbox_pred)

        post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
        nms_thresh        = cfg[cfg_key].RPN_NMS_THRESH
        min_size          = cfg[cfg_key].RPN_MIN_SIZE

        height, width = rpn_cls_prob_reshape.shape[1:3]

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        # (1, H, W, A)
        scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1],
                                                [1, height, width, _num_anchors])

        # TODO: NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
        # TODO: if you use the old trained model, VGGnet_fast_rcnn_iter_70000.ckpt, uncomment this line
        scores = rpn_cls_prob_reshape[:,:,:,_num_anchors:]

        bbox_deltas = rpn_bbox_pred
        #im_info = bottom[2].data[0, :]

        if DEBUG:
                print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
                print 'scale: {}'.format(im_info[2])
                print 'min_size: {}'.format(min_size)
                print 'max_nms_topN: {}'.format(max_nms_topN)
                print 'post_nms_topN: {}'.format(post_nms_topN)

        # 1. Generate proposals from bbox deltas and shifted anchors
        if DEBUG:
                print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * feat_stride
        shift_y = np.arange(0, height) * feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = _num_anchors
        K = shifts.shape[0]
        anchors = _anchors.reshape((1, A, 4)) + \
                          shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4)

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, isHardware)
        proposals = proposals.astype(bbox_deltas.dtype)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        #KM:  Move filtering into NMS (after estimating parameters
        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        #keep = _filter_boxes(proposals, min_size * im_info[2])
        #proposals = proposals[keep, :]
        #
        #print '[Ref Model Log] Num total Proposals before NMS : ' + str(proposals.shape)
        #scores = scores[keep]

        # # remove irregular boxes, too fat too tall
        # keep = _filter_irregular_boxes(proposals)
        # proposals = proposals[keep, :]
        # scores = scores[keep]

        # Hardware modeling             
        if (isHardware): 
        #if (0): 
                #proposals1 = np.copy(proposals)
                #scores1 = np.copy(scores)
                #KM:  Proposal inputs to NMS need to be in same order as HW or final results will be different!
                proposals1 = np.zeros(proposals.shape)
                scores1 = np.zeros(scores.shape)
                idy = 0
                for k in range(0,A):
                        for j in range(0,width):
                                for i in range(0,height):
                                        idx = (i*width*A)+(j*A)+k
                                        scores1[idy] = scores[idx]
                                        proposals1[idy] = proposals[idx]
                                        print_msg(str(k) + '.' + str(j) + '.' + str(i) + ' Proposal ' + str(idy) + ' -> [' + str(int(8*scores1[idy])) + '] ' + str((16*proposals1[idy,:]).astype(int)),1)
                                        idy = idy+1
                prop, score = nms_hw(proposals1, scores1, num_stddev, nms_thresh, min_size, im_info[2], max_nms_topN, post_nms_topN)
                batch_inds = np.zeros((prop.shape[0], 1), dtype=np.float32)
                blob = np.hstack((batch_inds, prop.astype(np.float32, copy=False)))                             
        else:
                order = scores.ravel().argsort()[::-1]
                if pre_nms_topN > 0:
                        order = order[:pre_nms_topN]
                proposals = proposals[order, :]
                scores = scores[order]
                keep = nms(np.hstack((proposals, scores)), nms_thresh)
                if post_nms_topN > 0:
                        keep = keep[:post_nms_topN]
                proposals = proposals[keep, :]
                scores = scores[keep]
                print 'Number of proposals : ' + str(len(keep))
                batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
                blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
        return blob
Exemple #17
0
def demo_search(net, im_dir, images, use_cuda, thresh=.75):
    with open('config.yml', 'r') as f:
        config = yaml.load(f)

    q_name = 's15166.jpg'
    q_roi = [29, 5, 164, 439]  # x1, y1, h, w
    x1, y1, h, w = q_roi

    q_path = os.path.join(im_dir, q_name)
    q_im, q_scale, _ = pre_process_image(q_path)
    q_roi = np.array(q_roi) * q_scale
    q_info = np.array([q_im.shape[1], q_im.shape[2], q_scale],
                      dtype=np.float32)

    q_im = q_im.transpose([0, 3, 1, 2])
    q_roi = np.hstack(([[0]], q_roi.reshape(1, 4)))

    with torch.no_grad():
        if use_cuda:
            q_im = torch.from_numpy(q_im).cuda()
            q_roi = torch.from_numpy(q_roi).float().cuda()
        else:
            q_im = torch.from_numpy(q_im)
            q_roi = torch.from_numpy(q_roi).float()

        q_feat = net.forward(q_im, q_roi, q_info, 'query')[0]

    # Show query
    fig, ax = plt.subplots(figsize=(16, 9))
    ax.imshow(plt.imread(q_path))
    plt.axis('off')
    ax.add_patch(
        plt.Rectangle((x1, y1),
                      h,
                      w,
                      fill=False,
                      edgecolor='#F92672',
                      linewidth=3.5))
    ax.add_patch(
        plt.Rectangle((x1, y1),
                      h,
                      w,
                      fill=False,
                      edgecolor='white',
                      linewidth=1))
    ax.text(x1 + 5,
            y1 - 15,
            '{}'.format('Query'),
            bbox=dict(facecolor='#F92672', linewidth=0),
            fontsize=20,
            color='white')
    plt.tight_layout()
    fig.savefig(os.path.join(im_dir, 'query.jpg'))
    plt.show()
    plt.close(fig)

    # Get gallery images
    images.remove(q_name)
    for im_name in images:
        im_path = os.path.join(im_dir, im_name)
        im, im_scale, orig_shape = pre_process_image(im_path, copy=True)
        im_info = np.array([im.shape[1], im.shape[2], im_scale],
                           dtype=np.float32)

        im = im.transpose([0, 3, 1, 2])

        if use_cuda:
            im = torch.from_numpy(im).cuda()
        else:
            im = torch.from_numpy(im)

        scores, bbox_pred, rois, features = net.forward(im, None, im_info)

        boxes = rois[:, 1:5] / im_info[2]
        scores = np.reshape(scores, [scores.shape[0], -1])
        bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
        if config['test_bbox_reg']:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred
            pred_boxes = bbox_transform_inv(
                torch.from_numpy(boxes), torch.from_numpy(box_deltas)).numpy()
            pred_boxes = clip_boxes(pred_boxes, orig_shape)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))

        boxes = pred_boxes

        # skip j = 0, because it's the background class
        j = 1
        inds = np.where(scores[:, j] > thresh)[0]
        cls_scores = scores[inds, j]
        cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
        cls_dets = np.hstack(
            (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32,
                                                           copy=False)
        keep = nms(torch.from_numpy(cls_dets),
                   config['test_nms']).numpy() if cls_dets.size > 0 else []
        cls_dets = cls_dets[keep, :]
        features = features[inds][keep]

        if cls_dets is None:
            print('There are no detections in image {}'.format(im_name))
            continue

        similarities = features.dot(q_feat)

        fig, ax = plt.subplots(figsize=(16, 9))
        ax.imshow(plt.imread(im_path))
        plt.axis('off')

        # Set different colors for different ids
        similarities_list = similarities.tolist()
        max_sim = max(similarities_list)
        similarities_list.remove(max_sim)
        colors = {value: '#66D9EF' for value in similarities_list}
        colors[max_sim] = '#4CAF50'

        for box, sim in zip(cls_dets, similarities):
            x1, y1, x2, y2, _ = box
            ax.add_patch(
                plt.Rectangle((x1, y1),
                              x2 - x1,
                              y2 - y1,
                              fill=False,
                              edgecolor=colors[sim],
                              linewidth=3.5))
            ax.add_patch(
                plt.Rectangle((x1, y1),
                              x2 - x1,
                              y2 - y1,
                              fill=False,
                              edgecolor='white',
                              linewidth=1))
            ax.text(x1 + 5,
                    y1 - 15,
                    '{:.2f}'.format(sim),
                    bbox=dict(facecolor=colors[sim], linewidth=0),
                    fontsize=20,
                    color='white')
        plt.tight_layout()
        fig.savefig(os.path.join(im_dir, 'result_' + im_name))
        plt.show()
        plt.close(fig)
Exemple #18
0
def demo_detection(net, im_dir, images, use_cuda, thresh=.75):
    with open('config.yml', 'r') as f:
        config = yaml.load(f)

    with torch.no_grad():
        for im_name in images:
            im_path = os.path.join(im_dir, im_name)
            im, im_scale, orig_shape = pre_process_image(im_path, copy=True)
            im_info = np.array([im.shape[1], im.shape[2], im_scale],
                               dtype=np.float32)

            im = im.transpose([0, 3, 1, 2])

            if use_cuda:
                im = torch.from_numpy(im).cuda()
            else:
                im = torch.from_numpy(im)

            scores, bbox_pred, rois, _ = net.forward(im, None, im_info)

            boxes = rois[:, 1:5] / im_info[2]
            scores = np.reshape(scores, [scores.shape[0], -1])
            bbox_pred = np.reshape(bbox_pred, [bbox_pred.shape[0], -1])
            if config['test_bbox_reg']:
                # Apply bounding-box regression deltas
                box_deltas = bbox_pred
                pred_boxes = bbox_transform_inv(
                    torch.from_numpy(boxes),
                    torch.from_numpy(box_deltas)).numpy()
                pred_boxes = clip_boxes(pred_boxes, orig_shape)
            else:
                # Simply repeat the boxes, once for each class
                pred_boxes = np.tile(boxes, (1, scores.shape[1]))

            boxes = pred_boxes

            # skip j = 0, because it's the background class
            j = 1
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = boxes[inds, j * 4:(j + 1) * 4]
            cls_dets = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32,
                                                               copy=False)
            keep = nms(
                torch.from_numpy(cls_dets),
                config['test_nms']).numpy() if cls_dets.size > 0 else []
            cls_dets = cls_dets[keep, :]

            if cls_dets is None:
                print('There are no detections in image {}'.format(im_name))
                continue

            fig, ax = plt.subplots(figsize=(16, 9))
            ax.imshow(plt.imread(im_path))
            plt.axis('off')
            for box in cls_dets:
                x1, y1, x2, y2, score = box
                ax.add_patch(
                    plt.Rectangle((x1, y1),
                                  x2 - x1,
                                  y2 - y1,
                                  fill=False,
                                  edgecolor='#66D9EF',
                                  linewidth=3.5))
                ax.add_patch(
                    plt.Rectangle((x1, y1),
                                  x2 - x1,
                                  y2 - y1,
                                  fill=False,
                                  edgecolor='white',
                                  linewidth=1))
                ax.text(x1 + 5,
                        y1 - 15,
                        '{:.2f}'.format(score),
                        bbox=dict(facecolor='#66D9EF', linewidth=0),
                        fontsize=20,
                        color='white')
            plt.tight_layout()
            plt.show()
            plt.close(fig)
Exemple #19
0
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'
        if self.phase == 0:
            cfg_key = 'TRAIN'
        elif self.phase == 1:
            cfg_key = 'TEST'
        else:
            cfg_key = str(self.phase)  # either 'TRAIN' or 'TEST'

        if cfg_key == 'TRAIN':
            nms_thresh = cfg[cfg_key].NMS_THRESH
            post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS
            pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS

        if cfg_key == 'TEST':
            pre_nms_topN = cfg[cfg_key].N_DETS_PER_MODULE
            score_thresh = cfg[cfg_key].SCORE_THRESH

        min_size = cfg[cfg_key].ANCHOR_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[-3].data  # For multi-class
        bbox_deltas = bottom[-2].data
        im_info = bottom[-1].data[0, :]

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride[0]
        shift_y = np.arange(0, height) * self._feat_stride[0]
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                            shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        num_classes = scores.shape[1] / (A * self._num_feats)
        anchors = self._anchors.reshape((1, A, 4)) + \
            shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))
        self.anchors = anchors

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape(
            (-1, num_classes, A * self._num_feats)).transpose(
                (0, 2, 1)).reshape((-1, num_classes))

        # Convert anchors into proposals via bbox transformations
        new_anchors = np.concatenate([anchors[:, np.newaxis, :]] *
                                     self._num_feats,
                                     axis=1).reshape((-1, 4))
        proposals = bbox_transform_inv(new_anchors, bbox_deltas)
        for i in range(self._num_refine):
            # Do this because a combination of bbox_transform_inv and _compute_targets
            # will cause a larger 3rd and 4th entry of coordinates
            # We do not do this at the last regression, just to follow the original code
            proposals[:, 2:4] -= 1
            refine_delta = bottom[i].data
            refine_delta = refine_delta.transpose((0, 2, 3, 1)).reshape(
                (-1, 4))
            proposals = bbox_transform_inv(proposals, refine_delta)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        if self._subsampled:
            anchor_map = np.zeros((height, width, A))
            for i in xrange(A):
                stride = self._feat_stride[i / len(self._shifts)**
                                           2] // self._feat_stride[0]
                anchor_map[::stride, ::stride, i] = 1
            anchor_map = anchor_map.reshape((K * A))
            subsampled_inds = np.where(anchor_map)[0]
            proposals = proposals[subsampled_inds, :]
            scores = scores[subsampled_inds, :]

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep, :]

        # # 4. sort all (proposal, score) pairs by score from highest to lowest
        # # 5. take top pre_nms_topN
        #
        max_score = np.max(scores[:, 1:], axis=1).ravel()
        order = max_score.argsort()[::-1]
        try:
            thresh_idx = np.where(max_score[order] >= score_thresh)[0].max()
        except:
            thresh_idx = 0  # Nothing greater then score_thresh, just keep the largest one
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        order = order[:thresh_idx + 1]
        proposals = proposals[order, :]
        scores = scores[order, :]

        # 6. apply nms (if in training mode)
        # 7. take after_nms_topN
        # 8. return the top proposals (-> RoIs top)
        if self.phase == 0:
            # DO NMS ONLY IN TRAINING TIME
            # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]

        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if proposals.shape[0] == 0:
            blob = np.array([[0, 0, 0, 16, 16]], dtype=np.float32)
        else:
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack(
                (batch_inds, proposals.astype(np.float32, copy=False)))

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
Exemple #20
0
    def forward(self, input):

        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs
        scores = input[0][:, self._num_anchors:, :, :]
        bbox_deltas = input[1]
        im_info = input[2]

        pre_nms_topN = self.cf.rpn_pre_nms_top_n
        post_nms_topN = self.cf.rpn_post_nms_top_n
        nms_thresh = self.cf.rpn_nms_thresh

        batch_size = bbox_deltas.size(0)

        feat_height, feat_width = scores.size(2), scores.size(3)
        shift_x = np.arange(0, feat_width) * self._feat_stride
        shift_y = np.arange(0, feat_height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = torch.from_numpy(
            np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                       shift_y.ravel())).transpose())
        shifts = shifts.contiguous().type_as(scores).float()

        A = self._num_anchors
        K = shifts.size(0)

        self._anchors = self._anchors.type_as(scores)
        anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
        anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:

        bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
        bbox_deltas = bbox_deltas.view(batch_size, -1, 4)

        # Same story for the scores:
        scores = scores.permute(0, 2, 3, 1).contiguous()
        scores = scores.view(batch_size, -1)

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info, batch_size)

        scores_keep = scores
        proposals_keep = proposals
        _, order = torch.sort(scores_keep, 1, True)

        output = scores.new(batch_size, post_nms_topN, 5).zero_()
        for i in range(batch_size):
            # # 3. remove predicted boxes with either height or width < threshold
            # # (NOTE: convert min_size to input image scale stored in im_info[2])
            proposals_single = proposals_keep[i]
            scores_single = scores_keep[i]

            # # 4. sort all (proposal, score) pairs by score from highest to lowest
            # # 5. take top pre_nms_topN (e.g. 6000)
            order_single = order[i]

            if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
                order_single = order_single[:pre_nms_topN]

            proposals_single = proposals_single[order_single, :]
            scores_single = scores_single[order_single].view(-1, 1)

            # 6. apply nms (e.g. threshold = 0.7)
            # 7. take after_nms_topN (e.g. 300)
            # 8. return the top proposals (-> RoIs top)

            keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1),
                             nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            if post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            scores_single = scores_single[keep_idx_i, :]

            # padding 0 at the end.
            num_proposal = proposals_single.size(0)
            output[i, :, 0] = i
            output[i, :num_proposal, 1:] = proposals_single

        return output
Exemple #21
0
def test_model(imdb, valroidb, model_test, output_dir):
    # --------------------data gen-------------------
    data_test = DataGen(imdb.num_classes, shuffle=False)
    data_test_gen = data_test.generator(valroidb)
    # --------------------data gen-------------------

    #verbose = False

    #class_name = ('__background__',  # always index 0
    #             'aeroplane', 'bicycle', 'bird', 'boat',
    #             'bottle', 'bus', 'car', 'cat', 'chair',
    #             'cow', 'diningtable', 'dog', 'horse',
    #             'motorbike', 'person', 'pottedplant',
    #             'sheep', 'sofa', 'train', 'tvmonitor')


    # --------------------start testing-------------------
    all_boxes = [[[] for _ in range(len(imdb.image_index))]
                 for _ in range(imdb.num_classes)]
    #output_dir = '../test_save/'
    epoch_length = len(imdb.image_index)
    thresh = 0.
    max_per_image = 100
    progbar = generic_utils.Progbar(epoch_length)
    for i_batch in xrange(epoch_length):

        input_image, im_info, gt_boxes = next(data_test_gen)
        
        rois_test, _, cls_prob_test, bbox_pred_test \
        = model_test.predict_on_batch([input_image, im_info])

        im_scale = im_info[0, 2]

        stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (imdb.num_classes))
        means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (imdb.num_classes))
        bbox_pred_test *= stds
        bbox_pred_test += means

        boxes = rois_test[:, 1:5]/im_scale
        scores = np.reshape(cls_prob_test, [cls_prob_test.shape[0], -1])
        bbox_pred = np.reshape(bbox_pred_test, [bbox_pred_test.shape[0], -1])
        if cfg.TEST.BBOX_REG:
            # Apply bounding-box regression deltas
            box_deltas = bbox_pred
            pred_boxes = bbox_transform_inv(boxes, box_deltas)
            image_shape = np.floor(im_info[0, 0:2]/im_scale)
            pred_boxes = _clip_boxes(pred_boxes, image_shape)
        else:
            # Simply repeat the boxes, once for each class
            pred_boxes = np.tile(boxes, (1, scores.shape[1]))


        # skip j = 0, because it's the background class
        for j in range(1, imdb.image_index):
            inds = np.where(scores[:, j] > thresh)[0]
            cls_scores = scores[inds, j]
            cls_boxes = pred_boxes[inds, j*4:(j+1)*4]
            cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
                         .astype(np.float32, copy=False)
            keep = nms(cls_dets, cfg.TEST.NMS)
            cls_dets = cls_dets[keep, :]
            all_boxes[j][i_batch] = cls_dets


        # Limit to max_per_image detections *over all classes*
        if max_per_image > 0:
            image_scores = np.hstack([all_boxes[j][i_batch][:, -1]
                                     for j in range(1, imdb.num_classes)])
            if len(image_scores) > max_per_image:
                image_thresh = np.sort(image_scores)[-max_per_image]
                for j in range(1, imdb.num_classes):
                    keep = np.where(all_boxes[j][i_batch][:, -1] >= image_thresh)[0]
                    all_boxes[j][i_batch] = all_boxes[j][i_batch][keep, :]

        progbar.update(i_batch)
    # --------------------start testing-------------------

    det_file = os.path.join(output_dir, 'detections.pkl')
    with open(det_file, 'wb') as f:
        pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)
    print('Evaluating detections')
    imdb.evaluate_detections(all_boxes, output_dir)
    def forward(self, bottom, top):
        # Algorithm:
        #
        # for each (H, W) location i
        #   generate A anchor boxes centered on cell i
        #   apply predicted bbox deltas at cell i to each of the A anchors
        # clip predicted boxes to image
        # remove predicted boxes with either height or width < threshold
        # sort all (proposal, score) pairs by score from highest to lowest
        # take top pre_nms_topN proposals before NMS
        # apply NMS with threshold 0.7 to remaining proposals
        # take after_nms_topN proposals after NMS
        # return the top proposals (-> RoIs top, scores top)

        assert bottom[0].data.shape[0] == 1, \
            'Only single item batches are supported'
        if self.phase==0:
            cfg_key = 'TRAIN'
        elif self.phase==1:
            cfg_key = 'TEST'
        else:
            cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'

        if cfg_key == 'TRAIN':
            nms_thresh = cfg[cfg_key].NMS_THRESH
            post_nms_topN = cfg[cfg_key].ANCHOR_N_POST_NMS
            pre_nms_topN = cfg[cfg_key].ANCHOR_N_PRE_NMS

        if cfg_key == 'TEST':
            pre_nms_topN =  cfg[cfg_key].N_DETS_PER_MODULE

        min_size = cfg[cfg_key].ANCHOR_MIN_SIZE

        # the first set of _num_anchors channels are bg probs
        # the second set are the fg probs, which we want
        scores = bottom[0].data[:, self._num_anchors:, :, :]
        bbox_deltas = bottom[1].data
        im_info = bottom[2].data[0, :]

        if DEBUG:
            print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
            print 'scale: {}'.format(im_info[2])

        # 1. Generate proposals from bbox deltas and shifted anchors
        height, width = scores.shape[-2:]

        if DEBUG:
            print 'score map size: {}'.format(scores.shape)

        # Enumerate all shifts
        shift_x = np.arange(0, width) * self._feat_stride
        shift_y = np.arange(0, height) * self._feat_stride
        shift_x, shift_y = np.meshgrid(shift_x, shift_y)
        shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
                            shift_x.ravel(), shift_y.ravel())).transpose()

        # Enumerate all shifted anchors:
        #
        # add A anchors (1, A, 4) to
        # cell K shifts (K, 1, 4) to get
        # shift anchors (K, A, 4)
        # reshape to (K*A, 4) shifted anchors
        A = self._num_anchors
        K = shifts.shape[0]
        anchors = self._anchors.reshape((1, A, 4)) + \
                  shifts.reshape((1, K, 4)).transpose((1, 0, 2))
        anchors = anchors.reshape((K * A, 4))

        # Transpose and reshape predicted bbox transformations to get them
        # into the same order as the anchors:
        #
        # bbox deltas will be (1, 4 * A, H, W) format
        # transpose to (1, H, W, 4 * A)
        # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
        # in slowest to fastest order
        bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))

        # Same story for the scores:
        #
        # scores are (1, A, H, W) format
        # transpose to (1, H, W, A)
        # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
        scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))

        # Convert anchors into proposals via bbox transformations
        proposals = bbox_transform_inv(anchors, bbox_deltas)

        # 2. clip predicted boxes to image
        proposals = clip_boxes(proposals, im_info[:2])

        # 3. remove predicted boxes with either height or width < threshold
        # (NOTE: convert min_size to input image scale stored in im_info[2])
        keep = _filter_boxes(proposals, min_size * im_info[2])
        proposals = proposals[keep, :]
        scores = scores[keep]

        # 4. sort all (proposal, score) pairs by score from highest to lowest
        # 5. take top pre_nms_topN

        order = scores.ravel().argsort()[::-1]
        if pre_nms_topN > 0:
            order = order[:pre_nms_topN]
        proposals = proposals[order, :]
        scores = scores[order]

        # 6. apply nms (if in training mode)
        # 7. take after_nms_topN
        # 8. return the top proposals (-> RoIs top)
        if self.phase == 0:
            # DO NMS ONLY IN TRAINING TIME
            # DURING TEST WE HAVE NMS OUTSIDE OF THIS FUNCTION 
            keep = nms(np.hstack((proposals, scores)), nms_thresh)
            if post_nms_topN > 0:
                keep = keep[:post_nms_topN]
            proposals = proposals[keep, :]
            scores = scores[keep]


        # Output rois blob
        # Our RPN implementation only supports a single input image, so all
        # batch inds are 0
        if proposals.shape[0] == 0:
            blob = np.array([[0,0,0,16,16]],dtype=np.float32)
        else:
            batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
            blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))

        top[0].reshape(*(blob.shape))
        top[0].data[...] = blob

        # [Optional] output scores blob
        if len(top) > 1:
            top[1].reshape(*(scores.shape))
            top[1].data[...] = scores
def compute_targets(feed_data, anchors, ls):
    bbox_pred, iou_pred, gt_boxes, gt_cls = feed_data

    # filter ignored groundtruth boxes
    gt_inds = np.where(gt_cls >= 0)[0]
    gt_boxes = gt_boxes[gt_inds]
    gt_cls = gt_cls[gt_inds]

    # transform bbox and rescale to inp_size
    box_pred = bbox_transform_inv(
        np.ascontiguousarray(bbox_pred, dtype=np.float32),
        np.ascontiguousarray(anchors, dtype=np.float32), ls, ls) * cfg.INP_SIZE

    hw, num_anchors, _ = box_pred.shape

    cls_target = np.zeros((hw, num_anchors, cfg.NUM_CLASSES), dtype=np.float32)
    cls_mask = np.zeros((hw, num_anchors, 1), dtype=np.float32)
    iou_target = np.zeros((hw, num_anchors, 1), dtype=np.float32)
    iou_mask = np.zeros((hw, num_anchors, 1), dtype=np.float32)
    bbox_target = np.zeros((hw, num_anchors, 4), dtype=np.float32)
    bbox_mask = np.zeros((hw, num_anchors, 1), dtype=np.float32)

    # compute overlaps btw prediction and groundtruth boxes
    box_pred = np.reshape(box_pred, [-1, 4])

    box_ious = box_overlaps(np.ascontiguousarray(box_pred, dtype=np.float32),
                            np.ascontiguousarray(gt_boxes, dtype=np.float32))

    box_ious = np.reshape(box_ious, [hw, num_anchors, -1])

    # select boxes with best iou smaller than thresh to assign negative
    neg_box_inds = np.where(np.max(box_ious, axis=2) < cfg.IOU_THRESH)
    iou_mask[neg_box_inds] = cfg.NO_OBJECT_SCALE * (0 - iou_pred[neg_box_inds])

    # locate groundtruth cells, compute bbox target
    feat_stride = cfg.INP_SIZE / ls

    cx = (gt_boxes[:, 0] + gt_boxes[:, 2]) * 0.5 / feat_stride
    cy = (gt_boxes[:, 1] + gt_boxes[:, 3]) * 0.5 / feat_stride
    cell_inds = np.floor(cx) * ls + np.floor(cy)
    cell_inds = cell_inds.astype(np.int)

    box_target = np.empty(gt_boxes.shape, dtype=np.float32)
    box_target[:, 0] = cx - np.floor(cx)
    box_target[:, 1] = cy - np.floor(cy)
    box_target[:, 2] = (gt_boxes[:, 2] - gt_boxes[:, 0]) / feat_stride
    box_target[:, 3] = (gt_boxes[:, 3] - gt_boxes[:, 1]) / feat_stride

    # select best anchor for each groundtruth boxes
    gt_boxes /= feat_stride  # rescale to anchors' scale

    anchor_ious = anchor_overlaps(
        np.ascontiguousarray(anchors, dtype=np.float32),
        np.ascontiguousarray(gt_boxes, dtype=np.float32))

    anchor_inds = np.argmax(anchor_ious, axis=0)

    # compute targets, masks
    for i, cell_i in enumerate(cell_inds):
        if cell_i >= hw or cell_i < 0:
            continue

        a = anchor_inds[i]

        iou_mask[cell_i, a, :] = cfg.OBJECT_SCALE * \
            (1 - iou_pred[cell_i, a, :])
        iou_target[cell_i, a, :] = box_ious[cell_i, a, i]

        bbox_mask[cell_i, a, :] = cfg.BBOX_SCALE
        box_target[i, 2:4] /= anchors[a]
        bbox_target[cell_i, a, :] = box_target[i]

        cls_mask[cell_i, a, :] = cfg.CLS_SCALE
        cls_target[cell_i, a, gt_cls[i]] = 1

    return bbox_target, bbox_mask, iou_target, iou_mask, cls_target, cls_mask
        rois, cls_prob, bbox_pred, \
        rpn_loss_cls, rpn_loss_box, \
        RCNN_loss_cls, RCNN_loss_bbox, \
        rois_label = fasterRCNN(image, info, gt_boxes)

        scores = cls_prob.data
        boxes = rois.data[:, 1:5]
        box_deltas = bbox_pred.data
        if cfg.TRAIN.CLASS_AGNOSTIC:
            box_deltas = box_deltas.view(-1, 4) * bbox_normalize_stds + bbox_normalize_means
            box_deltas = box_deltas.view(-1, 4)
        else:
            box_deltas = box_deltas.view(-1, 4) * bbox_normalize_stds + bbox_normalize_means
            box_deltas = box_deltas.view(-1, 4 * len(imdb.classes))

        pred_boxes = bbox_transform_inv(boxes, box_deltas)
        pred_boxes = clip_boxes(pred_boxes, info)
        pred_boxes /= im_scales[0]

        im2show = np.copy(im)
        for j in range(1, imdb.num_classes):
            inds = torch.nonzero(scores[:, j] > thresh).view(-1)
            if inds.numel() > 0:
                cls_scores = scores[:, j][inds]
                _, order = torch.sort(cls_scores, 0, True)
                if cfg.TRAIN.CLASS_AGNOSTIC:
                    cls_boxes = pred_boxes[inds, :]
                else:
                    cls_boxes = pred_boxes[inds][:, j*4:(j+1)*4]

                cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1)
Exemple #25
0
def _proposal_layer(rpn_bbox_cls, rpn_bbox_pred, im_size, feat_stride,
                    eval_mode):
    """

    :param rpn_bbox_cls: (None, H, W, 2 * k)
    :param rpn_bbox_pred: (None, H, W, 4 * k)
    :param im_size: (800, 600)
    :param feat_stride: 16
    :return:
    """
    rpn_bbox_cls_prob = rpn_softmax(rpn_bbox_cls)
    anchor = Anchors(feat_stride=feat_stride)
    # all_anchors (A * H * W, 4)
    anchors, A = anchor.get_anchors()
    num_anchors = A
    # (1,  2 * k, H, W)
    rpn_bbox_cls_prob = np.transpose(rpn_bbox_cls_prob, [0, 3, 1, 2])
    # (1,  4 * k, H, W)
    rpn_bbox_pred = np.transpose(rpn_bbox_pred, [0, 3, 1, 2])

    assert rpn_bbox_cls_prob.shape[0] == 1, 'Only support 1 batch_size'

    if not eval_mode:
        # 训练模式
        pre_nms_topN = cfg.train_rpn_pre_nms_top_n
        post_nms_topN = cfg.train_rpn_post_nms_top_n
        nms_thresh = cfg.train_rpn_nms_thresh
        min_size = cfg.train_rpn_min_size
    else:
        # 验证模式
        pre_nms_topN = cfg.test_rpn_pre_nms_top_n
        post_nms_topN = cfg.test_rpn_post_nms_top_n
        nms_thresh = cfg.test_rpn_nms_thresh
        min_size = cfg.test_rpn_min_size
    # 对于预测的cls 前9个表示背景 后9个表示前景
    scores = rpn_bbox_cls_prob[:, num_anchors:, :, :]
    bbox_deltas = rpn_bbox_pred
    # (1, 4 * k, H, W) -> (1, H, W, 4 * A)
    bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
    # Same story for the scores:
    #
    # scores are (1, A, H, W) format
    # transpose to (1, H, W, A)
    # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
    scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
    # 根据anchor 和 bbox 预测值 回归出来真正的anchor 从dx dy dw dh -->  cx cy w, h
    proposals = bbox_transform_inv(anchors, bbox_deltas)

    # 2. clip predicted boxes to image
    proposals = clip_boxes(proposals, im_size)

    # 3. remove predicted boxes with either height or width < threshold
    keep = _filter_boxes(proposals, min_size)
    proposals = proposals[keep, :]
    scores = scores[keep]

    # 4. sort all (proposal, score) pairs by score from highest to lowest
    # 5. take top pre_nms_topN (e.g. 6000)
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]

    # 6. apply nms (e.g. threshold = 0.7)
    # 7. take after_nms_topN (e.g. 300)
    # 8. return the top proposals (-> RoIs top)
    keep = nms(np.hstack((proposals, scores)), nms_thresh)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    # scores = scores[keep]

    # Output rois blob
    # Our RPN implementation only supports a single input image, so all
    # batch inds are 0
    batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
    blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
    return blob
Exemple #26
0
    def forward(self, results):
        self._net.blobs['data'].reshape(self._batch_size, 3, self._depth,
                                        self._height, self._width)
        self._net.blobs['tois'].reshape(self._batch_size * self.top * 8, 5)
        self._net.blobs['toi2'].reshape(self._batch_size * self.top * 8, 5)

        [clip, labels, gt_bboxes,
         is_last] = self.dataset.next_val_video(random=False)
        labels = int(labels)
        n = int(np.floor(clip.shape[0] / 8.0))

        rrrrr = []
        for i in xrange(n):
            batch_clip = clip[i * self._depth:(i + 1) * self._depth].transpose(
                [3, 0, 1, 2])
            batch_clip = np.expand_dims(batch_clip, axis=0)

            curr_results = results[i]
            r1 = curr_results[:, :11]
            r2 = curr_results[:, 11:]
            curr_dets = {
                'boxes': np.empty((0, self._depth, 4)),
                'pred_label': np.empty((0)),
                'pred_scores': np.empty((0, 2)),
            }
            tmp = r1.argmax(axis=1)
            for j in xrange(1, self.dataset.num_classes):
                tmp = tmp[tmp == j]
                if tmp.size == 0 and not (j == labels):
                    continue
                argsort_r = np.argsort(r1[:, j])[-self.top:]
                curr_scores = np.vstack((r1[argsort_r, j], r2[argsort_r,
                                                              j])).transpose()
                curr_boxes = self.anchors[argsort_r]
                curr_boxes = np.repeat(curr_boxes, 8, axis=0)
                batch_tois = np.hstack((np.zeros(
                    (curr_boxes.shape[0], 1)), curr_boxes))
                curr_idx = np.arange(self._depth).reshape(1, self._depth)
                curr_idx = np.repeat(curr_idx, self.top, axis=0).reshape(-1, 1)
                batch_toi2 = np.hstack((curr_idx, curr_boxes))

                self._net.blobs['data'].data[...] = batch_clip.astype(
                    np.float32, copy=False)
                self._net.blobs['tois'].data[...] = batch_tois.astype(
                    np.float32, copy=False)
                self._net.blobs['toi2'].data[...] = batch_toi2.astype(
                    np.float32, copy=False)

                self._net.forward()

                diff = self._net.blobs['fc8-2'].data[...][:, (j - 1) * 4:j * 4]
                boxes = bbox_transform_inv(batch_tois[:, 1:5], diff).reshape(
                    (self.top, 8, 4)) * 16
                curr_dets['boxes'] = np.vstack((curr_dets['boxes'], boxes))
                curr_dets['pred_label'] = np.hstack(
                    (curr_dets['pred_label'], np.ones(self.top) * j))
                curr_dets['pred_scores'] = np.vstack(
                    (curr_dets['pred_scores'], curr_scores))

            rrrrr.append(curr_dets)

        r = {'dets': rrrrr, 'gt_bboxes': gt_bboxes, 'gt_label': labels}
        '''
      stack_overlaps = np.empty((self._depth, self.top, gt_bboxes.shape[0]))
      for j in xrange(self._depth):
        curr_gt_idx = np.where(gt_bboxes[0,:,0] == i * self._depth + j)[0]
        curr_gt = gt_bboxes[:, curr_gt_idx, 1 : 5].reshape(-1, 4)
        overlaps = bbox_overlaps(
          np.ascontiguousarray(boxes[:, j], dtype=np.float),
          np.ascontiguousarray(curr_gt, dtype=np.float))
        stack_overlaps[j] = overlaps

        # Find wrong detections.

      for j in xrange(stack_overlaps.shape[2]):
        argmax_overlaps = np.sum(stack_overlaps[:,:,j], axis=0).argmax()
        ov[i * self._depth : (i+1) * self._depth, j] = stack_overlaps[:, argmax_overlaps, j]
    '''
        return is_last, r