Esempio n. 1
0
    def boxes_to_masks(self, img_path, boxes, labels):
        """
    Arguments:
    - img_path: img_file
    - boxes   : ndaray [[xyxy]] (n, 4) in original image
    - labels  : ndarray (n, )
    Return:
    - masks   : (n, ih, iw) uint8 [0,1]
    - rles    : list of rle instance
    """
        im = cv2.imread(img_path)
        blobs, im_scales = self._get_blobs(im)
        im_blob = blobs['data']  # (1, iH, iW, 3)
        blobs['im_info'] = np.array(
            [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
            dtype=np.float32)

        # forward
        self.net.test_image(blobs['data'], blobs['im_info'])

        # net_conv
        net_conv = self.net._predictions['net_conv']

        # run
        mask_prob = self.net._predict_masks_from_boxes_and_labels(
            net_conv, boxes * im_scales[0], labels)
        mask_prob = mask_prob.data.cpu().numpy()
        masks = recover_masks(mask_prob, boxes, im.shape[0],
                              im.shape[1])  # (N, ih, iw) uint8 [0-255]
        masks = (masks > 122.).astype(np.uint8)  # (N, ih, iw) uint8 [0,1]

        # encode to rles
        rles = []
        for m in masks:
            rle = COCOmask.encode(np.asfortranarray(m))
            rles += [rle]

        return masks, rles
Esempio n. 2
0
    def forward_image(self, img_path, nms_thresh=.3, conf_thresh=.65):
        """
        Arguments:
        - img_path   : path to image
        - nms_thresh : nms threshold
        - conf_thresh: confidence threshold [0,1]
        Return "data" is a dict of
        - det_ids: list of det_ids, order consistent with dets and masks
        - dets   : [{det_id, box, category_name, category_id, score}], box is [xywh] and category_id is coco_cat_id
        - masks  : ndarray (n, im_h, im_w) uint8 [0,1]
        - Feats  :
          - pool5     : Variable cuda (n, 1024, 7, 7)
          - fc7       : Variable cuda (n, 2048, 7, 7)
          - lfeats    : Variable cuda (n, 5)
          - dif_lfeats: Variable cuda (n, 5*topK)
          - cxt_fc7   : Variable cuda (n, topK, 2048)
          - cxt_lfeats: Variable cuda (n, topK, 5)
        - cxt_det_ids : list of [surrounding_det_ids] for each det_id
        """
        # read image
        im = imread(img_path)

        # 1st step: detect objects
        scores, boxes = self.mrcn.predict(img_path)

        # get head feats, i.e., net_conv
        # Variable cuda (1, 1024, h, w)
        net_conv = self.mrcn.net._predictions['net_conv']
        im_info = self.mrcn.net._im_info  # [[H, W, im_scale]]

        # get cls_to_dets, class_name -> [xyxys] which is (n, 5)
        cls_to_dets, num_dets = self.cls_to_detections(scores, boxes,
                                                       nms_thresh, conf_thresh)
        # make sure num_dets > 0
        thresh = conf_thresh
        while num_dets == 0:
            thresh -= 0.1
            cls_to_dets, num_dets = self.cls_to_detections(
                scores, boxes, nms_thresh, thresh)

        # add to dets
        dets = []
        det_id = 0
        for category_name, detections in cls_to_dets.items():
            # detections: list of (n, 5), [xyxyc]
            for detection in detections:
                x1, y1, x2, y2, sc = detection
                det = {
                    'det_id': det_id,
                    'box': [x1, y1, x2 - x1 + 1, y2 - y1 + 1],
                    'category_name': category_name,
                    'category_id':
                    self.imdb._class_to_coco_cat_id[category_name],
                    'score': sc
                }
                dets += [det]
                det_id += 1
        Dets = {det['det_id']: det for det in dets}
        det_ids = [det['det_id'] for det in dets]

        # 2nd step: get masks
        boxes = xywh_to_xyxy(np.array([det['box'] for det in dets
                                       ]))  # xyxy (n, 4) ndarray
        labels = np.array(
            [self.imdb._class_to_ind[det['category_name']] for det in dets])
        mask_prob = self.mrcn.net._predict_masks_from_boxes_and_labels(
            net_conv, boxes * im_info[0][2], labels)
        mask_prob = mask_prob.data.cpu().numpy()
        # (N, ih, iw) uint8 [0-255]
        masks = recover_masks(mask_prob, boxes, im.shape[0], im.shape[1])
        masks = (masks > 122.).astype(np.uint8)  # (N, ih, iw) uint8 [0,1]

        # 3rd step: compute features
        pool5, fc7 = self.mrcn.box_to_spatial_fc7(
            net_conv, im_info, boxes)  # (n, 1024, 7, 7), (n, 2048, 7, 7)
        lfeats = self.compute_lfeats(det_ids, Dets, im)
        dif_lfeats = self.compute_dif_lfeats(det_ids, Dets)
        cxt_fc7, cxt_lfeats, cxt_det_ids = self.fetch_cxt_feats(
            det_ids, Dets, fc7, self.model_opt)

        # move to Variable cuda
        lfeats = Variable(torch.from_numpy(lfeats).cuda())
        dif_lfeats = Variable(torch.from_numpy(dif_lfeats).cuda())
        cxt_lfeats = Variable(torch.from_numpy(cxt_lfeats).cuda())

        # return
        data = {}
        data['det_ids'] = det_ids
        data['dets'] = dets
        data['masks'] = masks
        data['cxt_det_ids'] = cxt_det_ids
        data['Feats'] = {
            'pool5': pool5,
            'fc7': fc7,
            'lfeats': lfeats,
            'dif_lfeats': dif_lfeats,
            'cxt_fc7': cxt_fc7,
            'cxt_lfeats': cxt_lfeats
        }
        return data
Esempio n. 3
0
def test_net(net, imdb, weights_filename, max_per_image=100, thresh=0.):
  np.random.seed(cfg.RNG_SEED)
  """Test a Fast R-CNN network on an image database."""
  num_images = len(imdb.image_index)
  # all detections are collected into:
  #  all_boxes[cls][image] = N x 5 array of detections in
  #  (x1, y1, x2, y2, score)
  all_boxes = [[[] for _ in range(num_images)]
         for _ in range(imdb.num_classes)]
  #  all_rles[cls][image] = [rle] array of N rles
  all_rles = [[[] for _ in range(num_images)] 
         for _ in range(imdb.num_classes)]

  output_dir = get_output_dir(imdb, weights_filename)
  # timers
  _t = {'im_detect' : Timer(), 'misc' : Timer()}

  for i in range(num_images):
    im = cv2.imread(imdb.image_path_at(i))

    _t['im_detect'].tic()
    scores, boxes, net_conv, im_scale = im_detect(net, im) # (n, 81), (n, 81*4), (n, 1024, H, W), float
    _t['im_detect'].toc()

    _t['misc'].tic()

    # skip j = 0, because it's the background class
    for j in range(1, imdb.num_classes):
      inds = np.where(scores[:, j] > thresh)[0]
      cls_scores = scores[inds, j]
      cls_boxes = boxes[inds, j*4:(j+1)*4]
      cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
        .astype(np.float32, copy=False)
      keep = nms(torch.from_numpy(cls_dets), cfg.TEST.NMS).numpy() if cls_dets.size > 0 else []
      cls_dets = cls_dets[keep, :]
      all_boxes[j][i] = cls_dets

    # Limit to max_per_image detections *over all classes*
    if max_per_image > 0:
      image_scores = np.hstack([all_boxes[j][i][:, -1]
                    for j in range(1, imdb.num_classes)])
      if len(image_scores) > max_per_image:
        image_thresh = np.sort(image_scores)[-max_per_image]
        for j in range(1, imdb.num_classes):
          keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0]
          all_boxes[j][i] = all_boxes[j][i][keep, :]

    # run mask branch on all_boxes[:][i]
    accumulated_boxes  = []
    accumulated_labels = []
    for j in range(1, imdb.num_classes):
      if all_boxes[j][i].shape[0] > 0:
        accumulated_boxes += [all_boxes[j][i][:, :4]]
        accumulated_labels += [j]*all_boxes[j][i].shape[0]
    accumulated_boxes = np.vstack(accumulated_boxes)   # acculuate max_per_image boxes [xyxy]
    accumulated_labels = np.array(accumulated_labels, dtype=np.uint8) # n category labels
    mask_prob = net._predict_masks_from_boxes_and_labels(net_conv, 
                            accumulated_boxes * im_scale,  # scaled boxes [xyxy]
                            accumulated_labels) # (n, 14, 14)
    mask_prob = mask_prob.data.cpu().numpy() # convert to numpy
    masks = recover_masks(mask_prob, accumulated_boxes, im.shape[0], im.shape[1]) # (n, ih, iw) uint8 [0,1]
    masks = (masks > 122.).astype(np.uint8)  # (n, ih, iw) uint8 [0,1] original size
    
    # add to all_rles
    rles = [COCOmask.encode(np.asfortranarray(m)) for m in masks]
    ri = 0
    for j in range(1, imdb.num_classes):
      ri_next = ri+all_boxes[j][i].shape[0]
      all_rles[j][i] = rles[ri:ri_next]
      assert len(all_rles[j][i]) == all_boxes[j][i].shape[0]
      ri = ri_next

    _t['misc'].toc()

    print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s' \
        .format(i + 1, num_images, _t['im_detect'].average_time(),
            _t['misc'].average_time()))

  det_file = os.path.join(output_dir, 'detections.pkl')
  with open(det_file, 'wb') as f:
    pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

  print('Evaluating detections')
  imdb.evaluate_detections(all_boxes, all_rles, output_dir)
Esempio n. 4
0
def eval_split(loader, model, crit, split, opt, max_per_image=100, thresh=0.):
  verbose = opt.get('verbose', True)
  num_sents = opt.get('num_sents', -1)
  #assert split != 'train', 'Check the evaluation split. (comment this line if you are evaluating [train])'

  # set mode
  model.eval()

  # initialize
  n = 0
  loss_evals = 0
  acc = 0
  num_sent = 0
  #predictions = []
  finish_flag = False
  
  #num_refs = {'train': 42404, 'val': 3811, 'testA': 1975, 'testB': 1810} #### RefCOCO
  #print('num_refs:', num_refs[split])
  
  # all detections are collected into:
  #  all_boxes[sent][cls] = N x 5 array of detections in
  #  (x1, y1, x2, y2, score)
  #all_boxes = [[] for _ in range(81)]
  #all_boxes = [[[] for _ in range(81)]
  #       for _ in range(num_refs[split])]
  #  all_rles[sent][cls] = [rle] array of N rles
  #all_rles = [[[] for _ in range(81)]
  #       for _ in range(num_refs[split])]
  
  cum_I, cum_U = 0, 0
  eval_seg_iou_list = [.5, .6, .7, .8, .9]
  seg_correct = np.zeros(len(eval_seg_iou_list), dtype=np.int32)
  seg_total = 0
  
  while True:

    #data = loader.getTestBatch(split, opt)
    #det_ids = data['det_ids']
    #sent_ids = data['sent_ids']
    #Feats = data['Feats'] 
    #labels = data['labels']
    data = loader.getTestBatch(split)
    
    image = data['data']
    im_info = data['im_info']
    gt_boxes = data['gt_boxes'] # scaled
    gt_masks = data['gt_masks']
    labels = data['labels']
    file_name = data['file_name']
    bounds = data['bounds']
    
    blobs = {}
    blobs['data'] = image
    blobs['im_info'] = im_info
    blobs['file_name'] = file_name
    blobs['bounds'] = bounds

    
    ##print('------------------------------------')
    #for i, sent_id in enumerate(sent_ids):
    for i in range(labels.shape[0]):
      blobs['gt_boxes'] = gt_boxes[i:i+1, :]
      blobs['gt_masks'] = gt_masks[i:i+1, :, :]
      label = labels[i:i+1, :]
      max_len = (label != 0).sum().data[0]
      blobs['labels'] = label[:, :max_len] # (1, max_len)
      blobs['sent_id'] = i
      
      
      scores, boxes, net_conv, im_scale = im_detect(model, blobs) # (n, 81), (n, 81*4), (n, 1024, H, W), float
      
      pred = np.where(scores == np.max(scores[:,1:]))
      pred_roi = pred[0][0]
      pred_class = pred[1][0]
      pred_box = boxes[pred_roi, pred_class*4:(pred_class+1)*4]
      #print('pred_box:', pred_box, 'pred_class:', pred_class)
      
      
      #print('scores:', scores.shape) # (266, 81) (300, 81)
      #print('boxes:', boxes.shape) # (266, 324) (300, 324) not scaled
      #print('net_conv:', net_conv.shape) # (1L, 1024L, 37L, 63L) (1L, 1024L, 37L, 63L)
      #print('im_scale:', im_scale) # 2.0 2.0
      """
      # skip j = 0, because it's the background class
      for j in range(1, 81):
        inds = np.where(scores[:, j] > thresh)[0]
        cls_scores = scores[inds, j]
        cls_boxes = boxes[inds, j*4:(j+1)*4]
        cls_dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])) \
          .astype(np.float32, copy=False)
        keep = nms(torch.from_numpy(cls_dets), cfg.TEST.NMS).numpy() if cls_dets.size > 0 else []
        cls_dets = cls_dets[keep, :]
        all_boxes[j] = cls_dets

      # Limit to max_per_image detections *over all classes*
      if max_per_image > 0:
        image_scores = np.hstack([all_boxes[j][:, -1]
                      for j in range(1, 81)])
        ##print('--------')
        if len(image_scores) > max_per_image:
          image_thresh = np.sort(image_scores)[-max_per_image]
          image_highest = np.max(image_scores)
          
          for j in range(1, 81):
            keep = np.where(all_boxes[j][:, -1] >= image_thresh)[0]
            all_boxes[j] = all_boxes[j][keep, :] # choose largest 100 boxes for image i
            
            for k in range(len(all_boxes[j])):
              if all_boxes[j][k, -1] >= image_highest:
                pred_box = all_boxes[j][k][:4]
                pred_class = j
                print('pred_box:', pred_box, 'pred_class:', pred_class)
      """

      gt_box = blobs['gt_boxes'][0, :4] / im_scale
      
      
      iou = computeIoU_box(pred_box, gt_box) # both original size
      ##print('IoU:', iou * 100)
      
      if iou >= 0.5:
        acc += 1
      loss_evals += 1


      # run mask branch on all_boxes[i][:]
      accumulated_boxes = np.array([pred_box])
      accumulated_labels = np.array([pred_class])
      
      #accumulated_boxes  = []
      #accumulated_labels = []
      #for j in range(1, 81):
      #  if all_boxes[i][j].shape[0] > 0:
      #    accumulated_boxes += [all_boxes[i][j][:, :4]]
      #    accumulated_labels += [j]*all_boxes[i][j].shape[0]
      #accumulated_boxes = np.vstack(accumulated_boxes)   # accumulate max_per_image boxes [xyxy] (100, 4)
      #accumulated_labels = np.array(accumulated_labels, dtype=np.uint8) # n category labels
      
      mask_prob = model._predict_masks_from_boxes_and_labels(net_conv, 
                              accumulated_boxes * im_scale,  # scaled boxes [xyxy]
                              accumulated_labels) # (n, 14, 14)
      mask_prob = mask_prob.data.cpu().numpy() # convert to numpy
      #print('accumulated_boxes:', accumulated_boxes, accumulated_boxes.shape) # (100, 4)
      #print('accumulated_labels:', accumulated_labels, accumulated_labels.shape) # (100,)
      #print('mask_prob:', np.min(mask_prob), np.max(mask_prob), mask_prob.shape) # 0~1 float (100, 14, 14)
      #print('size:', int(round(blobs['im_info'][0][0]/im_scale)), int(round(blobs['im_info'][0][1]/im_scale)))
      pred_mask = recover_masks(mask_prob, accumulated_boxes, int(round(blobs['im_info'][0][0]/im_scale)), int(round(blobs['im_info'][0][1]/im_scale))) # (n, ih, iw) uint8 [0,1]
      #print('pred_mask 0:', np.unique(pred_mask), pred_mask.shape) # 0~255 int (100, 294, 500)
      
      pred_mask = np.squeeze((pred_mask > 122.).astype(np.uint8), axis=0)  # (n, ih, iw) uint8 [0,1] original size
      #print('pred_mask 1:', np.unique(pred_mask), pred_mask.shape)
      
      # add to all_rles
      #rles = [COCOmask.encode(np.asfortranarray(m)) for m in pred_mask]
      #ri = 0
      #for j in range(1, 81):
      #  ri_next = ri+all_boxes[i][j].shape[0]
      #  all_rles[i][j] = rles[ri:ri_next]
      #  assert len(all_rles[i][j]) == all_boxes[i][j].shape[0]
      #  ri = ri_next
      
      gt_mask = imresize(np.squeeze(blobs['gt_masks'], axis=0), size=pred_mask.shape, interp='nearest')
      
      # compute iou
      I, U = computeIoU_seg(pred_mask, gt_mask)
      cum_I += I
      cum_U += U
      for n_eval_iou in range(len(eval_seg_iou_list)):
        eval_seg_iou = eval_seg_iou_list[n_eval_iou]
        seg_correct[n_eval_iou] += (I*1.0/U >= eval_seg_iou)
      seg_total += 1
      

      # add info
      #entry = {}
      
      #entry['file_name'] = file_name
      #entry['sent'] = loader.decode_labels(blobs['labels'].data.cpu().numpy())[0] # gd-truth sent
      #entry['gt_box'] = gt_box
      #entry['pred_box'] = pred_box
      #predictions.append(entry)
      ##print(i, ':', entry['sent'])
      num_sent += 1

      # if used up
      if num_sents > 0 and loss_evals >= num_sents:
        finish_flag = True
        break
      """
      # add back mean
      image_vis = image + cfg.PIXEL_MEANS
      image_vis = imresize(image_vis[0], np.round(im_info[0][:2] / im_info[0][2])) # assume we only have 1 image
      # BGR to RGB (opencv uses BGR)
      image_vis = image_vis[np.newaxis, :,:,::-1].copy(order='C')
      
      pred_box_vis = np.append(pred_box * im_scale, pred_class)
      pred_box_vis = np.expand_dims(pred_box_vis, axis=0)
      
      #print('image_vis:', image_vis.shape)
      #print('gt_boxes:', blobs['gt_boxes'], blobs['gt_boxes'].shape)
      #print('pred_box:', pred_box_vis, pred_box_vis.shape)
      #print('im_info:', im_info, im_info.shape)
      
      box_gt = draw_bounding_boxes(image_vis.copy(), blobs['gt_boxes'], im_info)
      box_pred = draw_bounding_boxes(image_vis.copy(), pred_box_vis, im_info)
      
      image_box_gt = Image.fromarray(box_gt[0, :])
      image_box_pred = Image.fromarray(box_pred[0, :])
      
      box_dir = 'result_box'
      if not os.path.exists(box_dir):
        os.makedirs(box_dir)
      image_box_gt.save('{}/{}_{}_box_gt.png'.format(box_dir, file_name[:-4], i))
      image_box_pred.save('{}/{}_{}_box_pred.png'.format(box_dir, file_name[:-4], i))

      
      # gt seg
      seg_gt = Image.fromarray(gt_mask*255).convert('L')
      seg_gt_c = np.array(seg_gt.filter(ImageFilter.CONTOUR))
      seg_gt_c = np.expand_dims(seg_gt_c, axis=2)
      seg_gt_c = np.concatenate((seg_gt_c, seg_gt_c, seg_gt_c), axis=2)
      
      seg_gt = np.array(seg_gt)
      seg_gt = np.expand_dims(seg_gt, axis=2)
      
      image_seg_gt = np.squeeze(box_gt, axis=0) + 0.5 * np.concatenate((seg_gt, seg_gt*0, seg_gt*0), axis=2)
      image_seg_gt[seg_gt_c==0] = 255
      image_seg_gt[image_seg_gt>255] = 255
      image_seg_gt = Image.fromarray(image_seg_gt.astype('uint8'))
      
      # pred seg
      seg_pred = Image.fromarray(pred_mask*255).convert('L')
      seg_pred_c = np.array(seg_pred.filter(ImageFilter.CONTOUR))
      seg_pred_c = np.expand_dims(seg_pred_c, axis=2)
      seg_pred_c = np.concatenate((seg_pred_c, seg_pred_c, seg_pred_c), axis=2)
      
      seg_pred = np.array(seg_pred)
      seg_pred = np.expand_dims(seg_pred, axis=2)
      
      image_seg_pred = np.squeeze(box_pred, axis=0) + 0.5 * np.concatenate((seg_pred, seg_pred*0, seg_pred*0), axis=2)
      image_seg_pred[seg_pred_c==0] = 255
      image_seg_pred[image_seg_pred>255] = 255
      image_seg_pred = Image.fromarray(image_seg_pred.astype('uint8'))
      
      # save seg
      seg_dir = 'result_box_seg'
      if not os.path.exists(seg_dir):
        os.makedirs(seg_dir)
      image_seg_gt.save('{}/{:0>5d}_{}_{}_seg_gt.png'.format(seg_dir, loader.iterators[split], file_name[:-4], i))
      image_seg_pred.save('{}/{}_{}_seg_pred.png'.format(seg_dir, file_name[:-4], i))
      """

      torch.cuda.empty_cache()


    # print
    ix0 = bounds['it_pos_now']
    ix1 = bounds['it_max']
    if verbose:
      print('evaluating [%s] ... image[%d/%d]\'s sents, det acc=%.2f%%, seg acc=%.2f%%, seg IoU=%.2f%%' % \
            (split, ix0, ix1, acc*100.0/loss_evals, seg_correct[0]*100.0/seg_total, cum_I*100.0/cum_U))

    # if we wrapped around the split
    if finish_flag or bounds['wrapped']:
      break
  return acc/loss_evals, eval_seg_iou_list, seg_correct, seg_total, cum_I, cum_U, num_sent