def get_bounding_boxes(rois, cls_prob, bbox_pred, im_info, allBoundingBoxes, index): global nusc_classes scores = cls_prob.data boxes = rois.data[:, :, 1:5] thresh = 0.05 if cfg.TRAIN.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_info[0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() bounding_boxes = [] for j in range(1, len(nusc_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], 0.3) cls_dets = cls_dets[keep.view(-1).long()] dets = cls_dets.cpu().numpy() #pdb.set_trace() for i in range(np.minimum(10, dets.shape[0])): bbox = list( int(np.round(x)) for x in cls_dets.cpu().numpy()[i, :4]) bbox = bbox + [j] score = dets[i, -1] if score > 0.3: bounding_boxes += [bbox] bb = BoundingBox(index, j, bbox[0], bbox[1], bbox[2], bbox[3], CoordinatesType.Absolute, None, BBType.Detected, score, format=BBFormat.XYWH) allBoundingBoxes.addBoundingBox(bb) return allBoundingBoxes
def generate_pseudo_label(output_dir, sp_dir, q_im_path, model, num_shot): # data = list of [im, cls] if not os.path.exists(output_dir): os.makedirs(output_dir) q_im = np.asarray(Image.open(q_im_path))[:, :, :3] if num_shot > 1: final_dets = None for i in range(num_shot): sp_im_path = os.path.join(sp_dir, f'shot_{i+1}.jpg') sp_im = np.asarray(Image.open(sp_im_path))[:, :, :3] cls_dets = run_detection(sp_im, q_im, model) if final_dets is not None: final_dets = torch.cat((final_dets, cls_dets), 0) else: final_dets = cls_dets _, order = torch.sort(final_dets[:, 4], 0, True) final_dets = final_dets[order] keep = nms(final_dets[:, :4], final_dets[:, 4], cfg.TEST.NMS) final_dets = final_dets[keep.view(-1).long()] else: sp_im_path = os.path.join(sp_dir, 'shot_1.jpg') sp_im = np.asarray(Image.open(sp_im_path))[:, :, :3] final_dets = run_detection(sp_im, q_im, model) return final_dets
def postprocess_dets(scores, bboxes, rois, im_info, pooled_features): """ Postprocess detections to get meaningful results. Inputs: - scores: tensor, (N, num_classes + 1) - bboxes: tensor, (1, N, 4 * (num_classes + 1)) - rois: tensor, (1, N, 5) - im_info: tensor, (1, 3) Outputs: - tensor (Ndets, 6), like (xmin, ymin, xmax, ymax, score, class) """ num_classes = scores.shape[1] # including bg use_cuda = USE_CUDA and torch.cuda.is_available() # Apply bounding-box regression deltas std = torch.FloatTensor((0.1, 0.1, 0.2, 0.2)) std = std.cuda() if use_cuda else std bboxes = bboxes.view(-1, 4) * std bboxes = bboxes.view(1, -1, 4 * num_classes) bboxes = bbox_transform_inv(rois[:, :, 1:5], bboxes, 1) bboxes = clip_boxes(bboxes, im_info, 1) bboxes /= im_info[0][-1] bboxes = bboxes[0] # (N, 4 * (num_classes + 1)) # Class-wise nms detections = [] detection_features = [] for cid in range(1, num_classes): inds = torch.nonzero(scores[:, cid] > 0.05).view(-1) if inds.numel() > 0: cls_scores = scores[:, cid][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = bboxes[inds][:, cid * 4:(cid + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # TODO: Use inds -> order -> keep to filter the pooled features cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], 0.3) cls_dets = cls_dets[keep.view(-1).long()] # (keep, 5) class_ids = torch.ones(len(cls_dets), 1) * (cid - 1) cls_dets = torch.cat(( cls_dets, class_ids.cuda() if use_cuda else class_ids), dim=1) detections.append(cls_dets) detection_features.append(pooled_features[inds][order][keep]) return torch.cat(detections, dim=0), torch.cat(detection_features, dim=0)
def nms_for_results(result_json, nms_threshold, output_json): all_boxes = json.load(open(result_json, "r")) print("Before NMS:", len(all_boxes)) # reformat all_data = {} for item in all_boxes: imgid = item["image_id"] if imgid not in all_data: all_data[imgid] = [] all_data[imgid].append(item) num_images = len(all_data) after_nms = [] for i, imgid in enumerate(all_data.keys()): # all_items = all_data[imgid] all_items.sort(key=lambda x: x["score"], reverse=True) pred_boxes = list(map(lambda x: xywh2xyxy(x["bbox"]), all_items)) cls_scores = list(map(lambda x: x["score"], all_items)) pred_boxes = Variable(torch.Tensor(pred_boxes)) cls_scores = Variable(torch.Tensor(cls_scores)) cls_dets = torch.cat((pred_boxes, cls_scores.unsqueeze(1)), 1) keep = nms(pred_boxes, cls_scores, nms_threshold) keep = keep.view(-1).long().cpu() keep_items = list(map(lambda x: all_items[x], keep)) after_nms.extend(keep_items) print("After NMS:", len(after_nms)) with open(output_json, "w") as f: json.dump(after_nms, f)
def __call__(self, ori_img): thresh = 0.5 allbox = [] assert isinstance(ori_img, np.ndarray), "input must be a numpy array!" if len(ori_img.shape) == 2: ori_img = ori_img[:, :, np.newaxis] ori_img = np.concatenate((ori_img, ori_img, ori_img), axis=2) blobs, im_scales = _get_image_blob(ori_img) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if self.device == "cuda": im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # infer rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = self.net(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.device == "cuda": box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_class)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() im2show = np.copy(ori_img) for j in xrange(1, len(self.pascal_class)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] im2show = vis_detections(im2show, self.pascal_class[j], cls_dets.cpu().numpy(), 0.5) return im2show, pred_boxes, scores, cls_dets.cpu().numpy()
def stomata_count(fasterRCNN, image, cuda, pascal_classes): if cuda: cfg.USE_GPU_NMS = True im_in = image if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) blobs, im_scales = _get_image_blob(im_in) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) # initilize the tensor holder here. im_data = torch.FloatTensor() im_info = torch.FloatTensor() num_boxes = torch.LongTensor() gt_boxes = torch.FloatTensor() # ship to cuda if cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] class_agnostic = False if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if class_agnostic: if cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() num_stomata = 0 label_stomata = np.copy(image) for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > int(0.5)).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] dets = cls_dets.cpu().numpy() label_stomata, num_stomata = vis_detections(label_stomata, pascal_classes[j], dets, 0.9) return num_stomata, label_stomata
def val(epoch, fasterRCNN, cfg): print('=== start val in epoch {} ==='.format(epoch)) # [val set] cfg.TRAIN.USE_FLIPPED = False cfg.USE_GPU_NMS = args.cuda imdb_val, roidb_val, ratio_list_val, ratio_index_val = combined_roidb( args.imdbval_name, False) imdb_val.competition_mode(on=True) val_size = len(roidb_val) print('{:d} val roidb entries'.format(len(roidb_val))) cfg.TRAIN.USE_FLIPPED = True # change again for training # [val dataset] dataset_val = roibatchLoader(roidb_val, ratio_list_val, ratio_index_val, 1, \ imdb_val.num_classes, training=False, normalize_as_imagenet=True) dataloader_val = torch.utils.data.DataLoader(dataset_val, batch_size=1, shuffle=False, num_workers=0) # print(' == forcibly insert checkpoint loading == ') # load_name = './models/ImgNet_pre/vgg16/coco/train_all/imagenet_0/head_1.pth' # print('load {}'.format(load_name)) # checkpoint = torch.load(load_name) # fasterRCNN.load_state_dict(checkpoint['model']) output_dir = get_output_dir(imdb_val, 'val_in_training') data_iter_val = iter(dataloader_val) num_images = len(imdb_val.image_index) thresh = 0.0 max_per_image = 100 all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb_val.num_classes)] # import ipdb; ipdb.set_trace() fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter_val) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) # rois_val, cls_prob_val, bbox_pred_val, \ # rpn_loss_cls_val, rpn_loss_box_val, \ # RCNN_loss_cls_val, RCNN_loss_bbox_val, \ # rois_label_val = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() print('Evaluating detections') mAP = imdb_val.evaluate_detections(all_boxes, output_dir, result_file=None) del dataset_val, dataloader_val return mAP
def main(cv2_img, fasterRCNN, all_boxes, query, _query_im): index = 0 data = [0, 0, 0, 0, 0] im = cv2_img im = cv2.resize(im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR) _im = np.copy(im) # make im_data im, im_scale = prep_im_for_blob(im, target_size=600) im = torch.tensor(im) im = torch.unsqueeze(im, 0) im = im.transpose(1, 3) im_data = im.transpose(2, 3) im_data = data[0] = im_data.cuda() im_info = data[2] = torch.tensor([[600, 899, 1.4052]]) gt_boxes = data[3] = torch.rand(1, 4, 5) # don't care catgory = data[4] = torch.tensor([1]) rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, _, RCNN_loss_bbox, \ rois_label, weight = fasterRCNN(im_data, query, im_info, gt_boxes, catgory) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= data[2][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() im2show = np.copy(_im) inds = torch.nonzero(scores > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[data[4]][index] = cls_dets.cpu().numpy() im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.8) _im2show = np.concatenate((im2show, _query_im), axis=1) plt.imshow(_im2show) plt.show()
def forward(self, input, use_gt_boxes=False): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) rois = input[0] im_info = input[1] roi_feat = input[2] nlp_features = input[3] cfg_key = input[4] #################################################### ### ### #################################################### assert roi_feat.dim() == 3, "roi_feat must be B x N x D shape" B = roi_feat.size(0) N = roi_feat.size(1) D = roi_feat.size(2) if cfg.TRAIN.RELPN_WITH_BBOX_INFO: rois_nm = rois.new(rois.size(0), rois.size(1), 4) xx = im_info[:, 1] yy = im_info[:, 0] rois_nm[:, :, :2] = rois[:, :, 1:3] #/ xx[:,None] rois_nm[:, :, 2:] = rois[:, :, 3:5] #/ yy[:,None] roi_feat4prop = torch.cat((roi_feat, Variable(rois_nm)), 2) D += 4 else: roi_feat4prop = roi_feat # #roi_feat4prop = roi_feat4prop.view(B * N, D) roi_feat4prop = self.sub_feat( roi_feat4prop) # feat dim reduction to 256 batch_size = rois.size(0) pre_nms_topN = cfg[cfg_key].RELPN_PRE_NMS_TOP_N post_nms_topN = cfg[ cfg_key].RELPN_POST_NMS_TOP_N #cfg[cfg_key].RELPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RELPN_NMS_THRESH min_size = cfg[cfg_key].RELPN_MIN_SIZE if DEBUG: print('im_size: ({}, {})'.format(im_info[0], im_info[1])) print('scale: {}'.format(im_info[2])) #################################################### ### Method 1.Use coorelation with nlp fv to compute the scores ### #################################################### map_x = np.arange(0, rois.size(1)) map_y = np.arange(0, rois.size(1)) map_x_g, map_y_g = np.meshgrid(map_x, map_y) map_yx = torch.from_numpy( np.vstack((map_y_g.ravel(), map_x_g.ravel())).transpose()).cuda() proposals = map_yx.expand(batch_size, rois.size(1) * rois.size(1), 2) # B x (N * N) x 2 # filter diagnal entries keep = self._filter_diag(proposals) proposals = proposals.contiguous().view( -1, 2)[keep.nonzero().squeeze(), :].contiguous().view( batch_size, -1, 2).contiguous() # -------------using NN to encode the pair feature---------------- # TODO: add new score method: all_box_pairs_fet = [] # bs x pairs_Num x 151 all_box_pairs_roi = [] # bs x pairs_Num x 8 all_box_pairs_score = [] post_nms_topN = proposals.size( 1 ) #todo with/o proposal, during training if we comment out this line, res is bad. output = rois.new(batch_size, post_nms_topN, 9).zero_() output_score = rois.new(batch_size, post_nms_topN, 1).zero_() output_proposals = proposals.new(batch_size, post_nms_topN, 2).zero_() nlp_features_repeated = nlp_features.unsqueeze(1).repeat( 1, proposals.size(1), 1) for b in range(batch_size): #batch_size #torch.cuda.empty_cache() # proposals_subject_roi_i = rois[b][proposals[b, :, 0], :][:, 1:5] # proposals_object_roi_i = rois[b][proposals[b, :, 1], :][:, 1:5] proposals_subject_fet_i = roi_feat4prop[b][proposals[ b, :, 0], :] # [:, 1:5] proposals_object_fet_i = roi_feat4prop[b][proposals[ b, :, 1], :] # [:, 1:5] # -------------using NN to encode the pair feature---------------- #all_box_pairs_fet.append(torch.cat((proposals_subject_fet_i, proposals_object_fet_i), 1)) # -------------using bi-lstm to encode the pair feature---------------- # Get the output from the LSTM. inputs pairNum x 2 x 256 box_pairs_fet = (torch.cat( (proposals_subject_fet_i, proposals_object_fet_i), 0)).view(2, -1, self.sub_feat_size) outputs, state = self.lstm_encoder(box_pairs_fet, None) # Return the Encoder's output. # sequence x minibatch x features length select_features = self.lstm_out(outputs[-1, :, :]) scores_i = F.cosine_similarity(select_features, nlp_features_repeated[b], dim=1, eps=1e-6) # all_box_pairs_score.append(scores_i) # # for b in range(batch_size): # scores_i = all_box_pairs_score[b] proposals_i = proposals[b] _, order_i = torch.sort(scores_i, descending=True) # from big to small if pre_nms_topN > 0 and pre_nms_topN < scores_i.numel(): order_single = order_i[:pre_nms_topN] else: order_single = order_i proposals_single = proposals_i[order_single, :] scores_single = scores_i[order_single].view(-1, 1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) if not use_gt_boxes: proposals_subject = rois[b][proposals_single[:, 0], :][:, 1:5] proposals_object = rois[b][proposals_single[:, 1], :][:, 1:5] rel_rois_final = torch.cat( (proposals_subject, proposals_object), 1) keep_idx_i = nms(rel_rois_final, scores_single.squeeze(1), 1).long().view(-1) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] else: proposals_single = proposals_single[:post_nms_topN, :] scores_single = scores_single[:post_nms_topN, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[b, :num_proposal, 0] = b output[b, :num_proposal, 1:5] = rois[b][proposals_single[:, 0], :][:, 1:5] output[b, :num_proposal, 5:] = rois[b][proposals_single[:, 1], :][:, 1:5] output_score[b, :num_proposal, 0] = scores_single.squeeze() output_proposals[b, :num_proposal, :] = proposals_single return output, output_proposals, output_score
def get_detections_from_im(fasterRCNN, classes, im_file, args, conf_thresh=0.2): """obtain the image_info for each image, im_file: the path of the image return: dict of {'image_id', 'image_h', 'image_w', 'num_boxes', 'boxes', 'features'} boxes: the coordinate of each box """ # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() #load images # im = cv2.imread(im_file) im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] vis = True blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() # the region features[box_num * 2048] are required. rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label, pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, pool_feat = True) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() max_conf = torch.zeros((pred_boxes.shape[0])) if args.cuda > 0: max_conf = max_conf.cuda() if vis: im2show = np.copy(im) for j in xrange(1, len(classes)): inds = torch.nonzero(scores[:, j] > conf_thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] index = inds[order[keep]] max_conf[index] = torch.where(scores[index, j] > max_conf[index], scores[index, j], max_conf[index]) if vis: im2show = vis_detections(im2show, classes[j], cls_dets.cpu().numpy(), 0.5) if args.cuda > 0: keep_boxes = torch.where(max_conf >= conf_thresh, max_conf, torch.tensor(0.0).cuda()) else: keep_boxes = torch.where(max_conf >= conf_thresh, max_conf, torch.tensor(0.0)) keep_boxes = torch.squeeze(torch.nonzero(keep_boxes), dim=-1) if len(keep_boxes) < MIN_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MIN_BOXES] elif len(keep_boxes) > MAX_BOXES: keep_boxes = torch.argsort(max_conf, descending=True)[:MAX_BOXES] objects = torch.argmax(scores[keep_boxes][:, 1:], dim=1) box_dets = np.zeros((len(keep_boxes), 4)) boxes = pred_boxes[keep_boxes] name_list = [] box_caption_feature = np.zeros((len(keep_boxes), 300)) box_caption_mask = np.ones(len(keep_boxes)) for i in range(len(keep_boxes)): kind = objects[i] + 1 bbox = boxes[i, kind * 4:(kind + 1) * 4] tmp_dets = np.array(bbox.cpu()) if (tmp_dets[2] - tmp_dets[0]) * (tmp_dets[3] - tmp_dets[1]) <= 10: box_caption_mask[i] = 0 class_name = classes[1:][objects[i]] box_dets[i] = tmp_dets name_list.append(class_name) doc = nlp1(class_name) token_vector = nlp2(doc[0].text).vector box_caption_feature[i, :] = token_vector return { 'image_h': np.size(im, 0), 'image_w': np.size(im, 1), 'num_boxes': len(keep_boxes), #'boxes': box_dets, # region shape 4 * 36, 4 is the xy positions #'features': (pooled_feat[keep_boxes].cpu()).detach().numpy(), 'text': name_list, #'text_feature': box_caption_feature, # 'text_mask': box_caption_mask }
def run_model(support_im_paths, query_path, cnt_shot, output_path_folder): # support # support_root_dir = 'datasets/supports' # class_dir = 'horse' # n_shot = 2 # im_paths = list(Path(os.path.join(support_root_dir, class_dir)).glob('*.jpg')) CWD = os.getcwd() print(support_im_paths) n_shot = len(support_im_paths) random.seed(0) im_path_list = random.sample(support_im_paths, k=n_shot) im_list = [] #fig = plt.figure(num=None, figsize=(8, 8), dpi=50, facecolor='w', edgecolor='k') for i, im_path in enumerate(im_path_list): im = Image.open(im_path) im_list.append(np.asarray(im)) support_data = support_im_preprocess(im_list, cfg, 320, n_shot) # query im = np.asarray(Image.open(query_path)) im2show = im.copy() query_data, im_info, gt_boxes, num_boxes = query_im_preprocess(im, cfg) # prepare data data = [query_data, im_info, gt_boxes, num_boxes, support_data] im_data, im_info, num_boxes, gt_boxes, support_ims = prepare_variable() with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) support_ims.resize_(data[4].size()).copy_(data[4]) # model cfg_from_list( ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]']) model_dir = os.path.join(CWD, 'models') load_path = os.path.join(model_dir, 'faster_rcnn_{}_{}_{}.pth'.format(1, 11, 34467)) model = get_model('multi', load_path, n_shot) start_time = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_data, im_info, gt_boxes, num_boxes, support_ims, gt_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) # re-scale boxes to the origin img scale pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() thresh = 0.05 inds = torch.nonzero(scores[:, 1] > thresh).view(-1) cls_scores = scores[:, 1][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] for i in range(cls_dets.shape[0]): w = cls_dets[i, 2] - cls_dets[i, 0] h = cls_dets[i, 3] - cls_dets[i, 1] if w > 0.5 * im2show.shape[1] or h > 0.5 * im2show.shape[0]: cls_dets[i, 4] = 0 end_time = time.time() im2show = vis_detections(im2show, ' ', cls_dets.cpu().numpy(), 0.5) output_path = os.path.join(output_path_folder, 'result' + str(cnt_shot) + '.jpg') cv2.imwrite(output_path, im2show[:, :, ::-1]) print(cls_dets) print(end_time - start_time)
def forward(self, input): """ for each (H, W) location i generate 9 anchor boxes centered on cell i finetune the for the 9 anchors at cell i bbox by predicted bbox deltas H = feat_h = h/16 W = feat_w = w/16 @param input: a tuple (rpn_cls_prob, rpn_bbox_pred, im_info, cfg_key) whose shape is ((batch,18,H,W), (batch,36,H,W), (batch,2), 'train/test') @return: rois (batch, 2000, 5), 2000 training proposals, each row is [batch_ind, x1, y1, x2, y2] """ # take the positive (object) scores scores = input[0][:, self._num_anchors:, :, :] # (batch, 9, H, W) bbox_deltas = input[1] # (batch, 36, H, W) im_info = input[2] # (batch, 2) cfg_key = input[3] # 'train/test' pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # 6000 for train post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # 300 for test nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # 0.7 min_size = cfg[cfg_key].RPN_MIN_SIZE # 16 batch_size = bbox_deltas.size(0) # batch # compute the shift value for H*W cells feat_height, feat_width = scores.size(2), scores.size(3) # H, W shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() # (H*W, 4) # copy and shift the 9 anchors for H*W cells # copy the H*W*9 anchors for batch images A = self._num_anchors # 9 K = shifts.size(0) # H * W self._anchors = self._anchors.type_as(scores) anchors = self._anchors.view(1, A, 4) + shifts.view( K, 1, 4) # (H*W, 9, 4) anchors for 1 image anchors = anchors.view(1, K * A, 4).expand( batch_size, K * A, 4) # (batch, H*W*9, 4) anchors for batch images # make bbox_deltas the same order with the anchors: bbox_deltas = bbox_deltas.permute( 0, 2, 3, 1).contiguous() # (batch, 36, H, W) --> (batch, H, W, 36) bbox_deltas = bbox_deltas.view( batch_size, -1, 4) # (batch, H, W, 36) --> (batch, H*W*9, 4) # Same story for the scores: scores = scores.permute( 0, 2, 3, 1).contiguous() # (batch, 9, H, W) --> (batch, H, W, 9) scores = scores.view(batch_size, -1) # (batch, H, W, 9) --> (batch, H*W*9) # Finetune [x1, y1, x2, y2] of anchors according to the predicted bbox_delta proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # (batch, H*W*9, 4) # 2. clip predicted boxes to the image, make sure [x1, y1, x2, y2] are within the image [h, w] proposals = clip_boxes(proposals, im_info, batch_size) # (batch, H*W*9, 4) scores_keep = scores proposals_keep = proposals # 3. remove predicted bboxes whose height or width < threshold # (NOTE: convert min_size to input image scale stored in im_info[2]) # 4. sort all (proposal, score) pairs by score from highest to lowest _, order = torch.sort(scores_keep, 1, True) # high score to low score # initialise the proposals by zero tensor output = scores.new(batch_size, post_nms_topN, 5).zero_() # for each image for i in range(batch_size): proposals_single = proposals_keep[i] scores_single = scores_keep[i] order_single = order[i] # 5. take top pre_nms_topN proposals before NMS (e.g. 6000) if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) # 6. apply NMS (e.g. threshold = 0.7) keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) # 7. take after_nms_topN proposals after NMS (e.g. 300 for test, 2000 for train) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] # 8. return the top proposals (-> RoIs top) proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # 9. padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output # (batch, 2000, 5) 2000 training proposals, each row is [batch_ind, x1, y1, x2, y2]
def test_net(model=None, image=None, params=None, bg=None, cls=None): blob, scale, label = params with torch.no_grad(): # pre-processing data for passing net im_data = Variable(torch.FloatTensor(1)) im_info = Variable(torch.FloatTensor(1)) num_boxes = Variable(torch.LongTensor(1)) gt_boxes = Variable(torch.FloatTensor(1)) im_info_np = np.array([[blob.shape[1], blob.shape[2], scale[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): # resize im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_data, im_info, gt_boxes, num_boxes) # predict scores = cls_prob.data boxes = rois.data[:, :, 1:5] if opt.TEST_BBOX_REG: box_deltas = bbox_pred.data if opt.TRAIN_BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if opt.cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(opt.TRAIN_BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(label)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= scale[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() image = np.copy(image[:, :, ::-1]) demo = image.copy() bubbles = [] dets_bubbles = [] for j in range(1, len(label)): inds = torch.nonzero(scores[:, j] > opt.THRESH).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], opt.TEST_NMS) cls_dets = cls_dets[keep.view(-1).long()].cpu().numpy() # post-processing : get contours of speech bubble demo, image, bubbles, dets_bubbles = bubble_utils.get_cnt_bubble(image, image.copy(), label[j], cls_dets, cls, bg=bg) return demo, image, bubbles, dets_bubbles
def predict1(): data = {"success": False} im_info1 = {} # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() start = time.time() max_per_image = 100 thresh = 0.05 vis = True file_dir = os.path.join(basedir, 'upload/') print('file_dir',file_dir) webcam_num = args.webcam_num # Set up webcam or get image directories if webcam_num >= 0: cap = cv2.VideoCapture(webcam_num) num_images = 0 else: imglist = os.listdir(file_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) while (num_images >= 0): total_tic = time.time() if webcam_num == -1: num_images -= 1 # Get image from the webcam if webcam_num >= 0: if not cap.isOpened(): raise RuntimeError("Webcam could not open. Please check connection.") ret, frame = cap.read() im_in = np.array(frame) # Load the demo image else: im_file = os.path.join(file_dir, imglist[num_images]) print("im_fileeeeeee",im_file) # im = cv2.imread(im_file) im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() jindex = [] info = {} info['predictions'] = list() filename = os.path.split(im_file) print("filename",filename[1]) info['filename'] = filename[1] image1 = Image.open(im_file); print('image1.size', image1.size); info['width'] = image1.size[0] info['height'] = image1.size[1] if vis: im2show = np.copy(im) for j in range(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] print('j', j) cls_dets.cpu().numpy() jindex.append(j) if vis: im2show = vis_detections(im2show, j, cls_dets.cpu().numpy(), 0.5) pred = vis_results(j,cls_dets.cpu().numpy(),0.5) print('pred',pred) if(pred!=[]): info['predictions'].append(pred) # print("cls_dets.cpu().numpy()",cls_dets.cpu().numpy()) # print('cls_dets',cls_dets) # box_re = cls_dets.cpu().numpy() # print('box_re',box_re) # # Loop over the results and add them to the list of returned predictions # info = {} # filename = os.path.split(im_file) # print("filename",filename[1]) # info['filename'] = filename[1] # image1 = Image.open(im_file); # print('image1.size', image1.size); # info['width'] = image1.size[0] # info['height'] = image1.size[1] # info['predictions'] = list() # j = 0 # for box in box_re: # r = {"BoxList": [str(i) for i in np.rint(box[:4]).astype(int)]} # r["BoxList"].append(jindex[j]) # j=j+1 # info['predictions'].append(r) # # Indicate that the request was a success. # s = {} data["success"] = True # s = {im_file: info} im_info1[filename[1]]=info data['im_info'] = im_info1 print(data) new_data = process(data) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(file_dir, imglist[num_images][:-4] + "_det.jpg") # cv2.imwrite(result_path, im2show) else: im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cv2.imshow("frame", im2showRGB) total_toc = time.time() total_time = total_toc - total_tic frame_rate = 1 / total_time print('Frame rate:', frame_rate) if cv2.waitKey(1) & 0xFF == ord('q'): break if webcam_num >= 0: cap.release() cv2.destroyAllWindows() return flask.jsonify(new_data)
def detect(self, dataset, foldername, filename, ch, vis, bbox_log): image_num = os.path.splitext(filename)[0] output_folder = 'output/' + dataset + "_ch" + str(ch) if not os.path.exists(output_folder): os.mkdir(output_folder) total_tic = time.time() # im = cv2.imread(im_file) im_file = foldername + "/" + filename im_in = np.array(imread(im_file)) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): self.im_data.resize_(im_data_pt.size()).copy_(im_data_pt) self.im_info.resize_(im_info_pt.size()).copy_(im_info_pt) self.gt_boxes.resize_(1, 1, 5).zero_() self.num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, rois_label = self.fasterRCNN( self.im_data, self.im_info, self.gt_boxes, self.num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if self.myargs.class_agnostic: if self.myargs.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if self.myargs.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(self.pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, self.im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im2show = np.copy(im) for j in xrange(1, len(self.pascal_classes)): inds = torch.nonzero(scores[:, j] > self.thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if self.myargs.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if bbox_log: bbox_list = cls_dets.cpu().numpy() for bb in bbox_list: start_x = int(bb[0]) start_y = int(bb[1]) end_x = int(bb[2]) end_y = int(bb[3]) confidence = bb[4] if confidence > 0.5: fo.write( str(ch) + "," + image_num + "," + str(start_x) + "," + str(start_y) + "," + str(end_x) + "," + str(end_y) + "," + str(confidence) + "\n" ) if vis: im2show = vis_detections(im2show, self.pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ # .format(num_images + 1, len(imglist), detect_time, nms_time)) # sys.stdout.flush() if vis: result_path = os.path.join(output_folder, str(image_num) + ".jpg") cv2.imwrite(result_path, im2show)
def eval_one_dataloader(save_dir_test_out, dataloader_t, fasterRCNN, device, imdb, target_num=0, class_agnostic=False, thresh=0.0, max_per_image=100, return_ap_class=False): save_name = save_dir_test_out + '_test_in_' num_images = len(imdb.image_index) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) data_iter = iter(dataloader_t) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() #fasterRCNN.training = False empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) im_data = data[0].to(device) im_info = data[1].to(device) gt_boxes = data[2].to(device) num_boxes = data[3].to(device) with torch.no_grad(): if isinstance(fasterRCNN, frcnn_htcn) or isinstance(fasterRCNN, frcnn_htcn_m): det_tic = time.time() rois , cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label, _, _, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, target_num=target_num) elif isinstance(fasterRCNN, frcnn_saito): det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label, _, _ = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) else: det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # d_pred = d_pred.data # path = data[4] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() # [1, 300, 2] -> [300, 2] pred_boxes = pred_boxes.squeeze() # [1, 300, 8] -> [300, 8] det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh, as_tuple=False).view(-1) # [300] # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] # [300] _, order = torch.sort(cls_scores, 0, True) if class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] # [300, 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # [300, 5] # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) # [N, 1] cls_dets = cls_dets[keep.view(-1).long()] # [N, 5] all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) # [M,] if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic # sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ # .format(i + 1, num_images, detect_time, nms_time)) # sys.stdout.flush() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') map, ap_per_class = imdb.evaluate_detections(all_boxes, output_dir) #fasterRCNN.training = del scores del boxes del all_boxes del pred_boxes del rois del cls_prob del bbox_pred del rpn_loss_cls del rpn_loss_box del RCNN_loss_cls del RCNN_loss_bbox del rois_label if return_ap_class: return map, ap_per_class return map
_, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] if args.soft_nms: np_dets = cls_dets.cpu().numpy().astype(np.float32) keep = soft_nms(np_dets, cfg.TEST.SOFT_NMS_METHOD ) # np_dets will be changed in soft_nms keep = torch.from_numpy(keep).type_as(cls_dets).int() cls_dets = torch.from_numpy(np_dets).type_as(cls_dets) else: keep = nms(cls_dets, cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes):
def forward(self, input): scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) proposals = clip_boxes(proposals, im_info, batch_size) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def test_net(fasterRCNN, image, img_blob, img_scales, items, labels, i): im_data, im_info, num_boxes, gt_boxes = items im_info_np = np.array( [[img_blob.shape[1], img_blob.shape[2], img_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(img_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if config.TEST_BBOX_REG: box_deltas = bbox_pred.data if config.TRAIN_BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if config.cuda: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(config.TRAIN_BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(labels)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= img_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() copy_img = np.copy(image[:, :, ::-1]) bubbles = [] for j in range(1, len(labels)): inds = torch.nonzero(scores[:, j] > config.THRESH).view(-1) if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], config.TEST_NMS) cls_dets = cls_dets[keep.view(-1).long()] copy_img, vis_img, bubbles, boxes = sbd_utils.divideBubbleFromImage( copy_img, image[:, :, ::-1], labels[j], cls_dets.cpu().numpy(), config.CLASS_THRESH, bg=config.BACKGROUND) copy_img, vis_img, cuts = sbd_utils.divideCutFromImage( copy_img, image[:, :, ::-1], i, bg=config.BACKGROUND) alpha_image = sbd_utils.addImageToAlphaChannel(copy_img, copy_img, FLAG='conversion') vis_img, texts = text.detection(vis_img, bubbles, boxes) return alpha_image, vis_img, cuts, bubbles, texts
def test(args, model=None): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # Load dataset imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False) imdb_vu.competition_mode(on=True) dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu._classes, training=False) # initilize the network here. if not model: if args.net == 'vgg16': fasterRCNN = vgg16(imdb_vu.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb_vu.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb_vu.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") fasterRCNN.create_architecture() # Load checkpoint print("load checkpoint %s" % (args.weights)) checkpoint = torch.load(args.weights) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') else: # evaluate constructed model fasterRCNN = model # initialize the tensor holder here. im_data = torch.FloatTensor(1) query = torch.FloatTensor(1) im_info = torch.FloatTensor(1) catgory = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: cfg.CUDA = True fasterRCNN.cuda() im_data = im_data.cuda() query = query.cuda() im_info = im_info.cuda() catgory = catgory.cuda() gt_boxes = gt_boxes.cuda() # record time start = time.time() # visiualization vis = args.vis if hasattr(args, 'vis') else None if vis: thresh = 0.05 else: thresh = 0.0 max_per_image = 100 fasterRCNN.eval() dataset_vu.query_position = 0 test_scales = cfg.TEST.SCALES multiscale_iterators = [] for i_scale, test_scale in enumerate(test_scales): cur_dataloader_vu = torch.utils.data.DataLoader(dataset_vu, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) cur_data_iter_vu = iter(cur_dataloader_vu) multiscale_iterators.append(cur_data_iter_vu) # total quantity of testing images, each images include multiple detect class num_images_vu = len(imdb_vu.image_index) num_detect = len(ratio_index_vu[0]) all_boxes = [[[] for _ in range(num_images_vu)] for _ in range(imdb_vu.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} for i, index in enumerate(ratio_index_vu[0]): det_tic = time.time() multiscale_boxes = [] multiscale_scores = [] for i_scale, (data_iter_vu, test_scale) in enumerate( zip(multiscale_iterators, test_scales)): # need to rewrite cfg.TRAIN.SCALES - very hacky! BACKUP_TRAIN_SCALES = cfg.TRAIN.SCALES cfg.TRAIN.SCALES = [test_scale] data = next(data_iter_vu) cfg.TRAIN.SCALES = BACKUP_TRAIN_SCALES with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) query.resize_(data[1].size()).copy_(data[1]) im_info.resize_(data[2].size()).copy_(data[2]) gt_boxes.resize_(data[3].size()).copy_(data[3]) catgory.data.resize_(data[4].size()).copy_(data[4]) # Run Testing if not hasattr(args, "class_image_augmentation" ) or not args.class_image_augmentation: queries = [query] elif args.class_image_augmentation.lower() == "rotation90": queries = [query] for _ in range(3): queries.append(queries[-1].rot90(1, [2, 3])) else: raise RuntimeError( "Unknown class_image_augmentation: {}".format( args.class_image_augmentation)) for q in queries: rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, _, RCNN_loss_bbox, \ rois_label, weight = fasterRCNN(im_data, q, im_info, gt_boxes, catgory) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # Apply bounding-box regression if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view( 1, -1, 4 * len(imdb_vu.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # Resize to original ratio pred_boxes /= data[2][0][2].item() # Remove batch_size dimension scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() multiscale_scores.append(scores) multiscale_boxes.append(pred_boxes) scores = torch.cat(multiscale_scores, dim=0) pred_boxes = torch.cat(multiscale_boxes, dim=0) # Record time det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() # Post processing inds = torch.nonzero(scores > thresh).view(-1) if inds.numel() > 0: # remove useless indices cls_scores = scores[inds] cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # rearrange order _, order = torch.sort(cls_scores, 0, True) cls_dets = cls_dets[order] # NMS keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[catgory][index] = cls_dets.cpu().numpy() # Limit to max_per_image detections *over all classes* if max_per_image > 0: try: image_scores = all_boxes[catgory][index][:, -1] if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] keep = np.where( all_boxes[catgory][index][:, -1] >= image_thresh)[0] all_boxes[catgory][index] = all_boxes[catgory][index][ keep, :] except: pass misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_detect, detect_time, nms_time)) sys.stdout.flush() # save test image if vis and i % 1 == 0: im2show = cv2.imread( dataset_vu._roidb[dataset_vu.ratio_index[i]]['image']) im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.3) o_query = data[1][0].permute(1, 2, 0).contiguous().cpu().numpy() o_query *= [0.229, 0.224, 0.225] o_query += [0.485, 0.456, 0.406] o_query *= 255 o_query = o_query[:, :, ::-1] (h, w, c) = im2show.shape o_query = cv2.resize(o_query, (h, h), interpolation=cv2.INTER_LINEAR) im2show = np.concatenate((im2show, o_query), axis=1) vis_path = "./test_img" if not os.path.isdir(vis_path): os.makedirs(vis_path) cv2.imwrite(os.path.join(vis_path, "%d_d.png" % (i)), im2show) print('Evaluating detections') mAP = imdb_vu.evaluate_detections(all_boxes, None) end = time.time() print("test time: %0.4fs" % (end - start)) return mAP
inds = torch.nonzero(scores[:, j] > thresh, as_tuple=False).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) # 0.4.0 version keep = nms(cls_dets[:, :4], cls_dets[:, 4], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: # cv2.imshow('test', im2show)
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs scores = input[0][:, self._num_anchors:, :, :] bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) feat_height, feat_width = scores.size(2), scores.size(3) shift_x = np.arange(0, feat_width) * self._feat_stride shift_y = np.arange(0, feat_height) * self._feat_stride shift_x, shift_y = np.meshgrid(shift_x, shift_y) shifts = torch.from_numpy( np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()) shifts = shifts.contiguous().type_as(scores).float() A = self._num_anchors K = shifts.size(0) self._anchors = self._anchors.type_as(scores) # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous() anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4) anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4) # Transpose and reshape predicted bbox transformations to get them # into the same order as the anchors: bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # Same story for the scores: scores = scores.permute(0, 2, 3, 1).contiguous() scores = scores.view(batch_size, -1) ######################################################## ########## # save anchor ########## # _, order = torch.sort(scores, 1, True) # anchors_to_save = anchors[0][order[0]].clone().cpu().numpy() # scores_to_save = scores[0][order[0]].clone().cpu().numpy() # np.save('/home/tony/FSOD/output/visualization/anchors.npy', anchors_to_save) # np.save('/home/tony/FSOD/output/visualization/scores.npy', scores_to_save) ######################################################## # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # proposals = clip_boxes_batch(proposals, im_info, batch_size) # assign the score to 0 if it's non keep. # keep = self._filter_boxes(proposals, min_size * im_info[:, 2]) # trim keep index to make it euqal over batch # keep_idx = torch.cat(tuple(keep_idx), 0) # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size) # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4) # _, order = torch.sort(scores_keep, 1, True) scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1, 1) ######################################################### ########## # save aproposals (before nms) ########## # proposals_to_save = proposals_single.clone().cpu().numpy() # scores_to_save = scores_single.clone().cpu().numpy() # np.save('/home/tony/FSOD/output/visualization/proposals.npy', proposals_to_save) # np.save('/home/tony/FSOD/output/visualization/scores.npy', scores_to_save) ######################################################### # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] ######################################################## ########## # save aproposals (after nms) ########## # proposals_to_save = proposals_single.clone().cpu().numpy() # scores_to_save = scores_single.clone().cpu().numpy() # np.save('/home/tony/FSOD/output/visualization/proposals.npy', proposals_to_save) # np.save('/home/tony/FSOD/output/visualization/scores.npy', scores_to_save) ######################################################## # padding 0 at the end. num_proposal = proposals_single.size(0) output[i, :, 0] = i output[i, :num_proposal, 1:] = proposals_single return output
def validation(val_dataloader, epoch, model_name, val_imdb, args): val_imdb.competition_mode(on=True) print('Start Validation') val_fasterRCNN = resnet(val_imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) val_fasterRCNN.create_architecture() print("load checkpoint %s" % model_name) checkpoint = torch.load(model_name) val_fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') if args.cuda: val_im_data = torch.FloatTensor(1).cuda() val_im_info = torch.FloatTensor(1).cuda() val_num_boxes = torch.LongTensor(1).cuda() val_gt_boxes = torch.FloatTensor(1).cuda() val_fasterRCNN.cuda() cfg.CUDA = True else: val_im_data = torch.FloatTensor(1) val_im_info = torch.FloatTensor(1) val_num_boxes = torch.LongTensor(1) val_gt_boxes = torch.FloatTensor(1) val_im_data = Variable(val_im_data) val_im_info = Variable(val_im_info) val_num_boxes = Variable(val_num_boxes) val_gt_boxes = Variable(val_gt_boxes) start = time.time() # 每张图像最大目标检测数量 max_per_image = 100 thresh = 0.0 save_name = 'val_' + args.exp_group num_images = len(val_imdb.image_index) # 创建[[[],[]...[]],[[],[]...[]]] 1,2,200 all_boxes = [[[] for _ in range(num_images)] for _ in range(val_imdb.num_classes)] output_dir = get_output_dir(val_imdb, save_name) _t = {'im_detect': time.time(), 'misc': time.time()} save_dir = os.path.join(output_dir, f"PRCurves_{args.exp_group}") os.makedirs(save_dir, exist_ok=True) det_file = os.path.join(save_dir, f'epoch_{epoch}_detections.pkl') val_fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i, data in enumerate(val_dataloader): with torch.no_grad(): val_im_data.resize_(data[0].size()).copy_(data[0]) val_im_info.resize_(data[1].size()).copy_(data[1]) val_gt_boxes.resize_(data[2].size()).copy_(data[2]) val_num_boxes.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() val_rois, val_cls_prob, val_bbox_pred, \ val_rpn_loss_cls, val_rpn_loss_box, val_RCNN_loss_cls, \ val_RCNN_loss_bbox, val_rois_label = val_fasterRCNN(val_im_data, val_im_info, val_gt_boxes, val_num_boxes) scores = val_cls_prob.data boxes = val_rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = val_bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(val_imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, val_im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() for j in range(1, val_imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in range(1, val_imdb.num_classes) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, val_imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r'.format( i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') map = val_imdb.evaluate_detections(all_boxes, epoch, output_dir) end = time.time() print("test time: %0.4fs" % (end - start)) return map
def main(args): if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) cfg.USE_GPU_NMS = args.cuda np.random.seed(cfg.RNG_SEED) pascal_classes = np.asarray(['__background__', 'targetobject', 'hand']) args.set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32, 64]', 'ANCHOR_RATIOS', '[0.5, 1, 2]' ] # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(pascal_classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(pascal_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(pascal_classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(pascal_classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() raise Exception fasterRCNN.create_architecture() load_name = 'models/res101_handobj_100K/pascal_voc/faster_rcnn_1_8_132028.pth' print("load checkpoint %s" % (load_name)) if args.cuda > 0: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=(lambda storage, loc: storage)) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY def _get_image_blob(im): """Converts an image into a network input. Arguments: im (ndarray): a color image in BGR order Returns: blob (ndarray): a data blob holding an image pyramid im_scale_factors (list): list of image scales (relative to im) used in the image pyramid """ im_orig = im.astype(np.float32, copy=True) im_orig -= cfg.PIXEL_MEANS im_shape = im_orig.shape im_size_min = np.min(im_shape[0:2]) im_size_max = np.max(im_shape[0:2]) processed_ims = [] im_scale_factors = [] for target_size in cfg.TEST.SCALES: im_scale = float(target_size) / float(im_size_min) # Prevent the biggest axis from being more than MAX_SIZE if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR) im_scale_factors.append(im_scale) processed_ims.append(im) # Create a blob to hold the input images blob = im_list_to_blob(processed_ims) return blob, np.array(im_scale_factors) # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) box_info = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() with torch.no_grad(): if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() with torch.no_grad(): start = time.time() max_per_image = 100 thresh_hand = args.thresh_hand thresh_obj = args.thresh_obj vis = args.vis # print(f'thresh_hand = {thresh_hand}') # print(f'thnres_obj = {thresh_obj}') webcam_num = args.webcam_num # Set up webcam or get image directories if webcam_num >= 0: cap = cv2.VideoCapture(webcam_num) num_images = 0 else: print(f'image dir = {args.image_dir}') print(f'save dir = {args.save_dir}') imglist = os.listdir(args.image_dir) num_images = len(imglist) print('Loaded Photo: {} images.'.format(num_images)) while (num_images >= 0): total_tic = time.time() if webcam_num == -1: num_images -= 1 # Get image from the webcam if webcam_num >= 0: if not cap.isOpened(): raise RuntimeError( "Webcam could not open. Please check connection.") ret, frame = cap.read() im_in = np.array(frame) # Load the demo image else: im_file = os.path.join(args.image_dir, imglist[num_images]) im_in = np.array(imread(im_file)) # resize # im_in = np.array(Image.fromarray(im_in).resize((640, 360))) if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array( [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() box_info.resize_(1, 1, 5).zero_() # pdb.set_trace() det_tic = time.time() print(im_data.shape) rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label, \ loss_list = fasterRCNN(im_data, im_info, gt_boxes, num_boxes, box_info) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # extact predicted params contact_vector = loss_list[0][0] # hand contact state info offset_vector = loss_list[1][0].detach( ) # offset vector (factored into a unit vector and a magnitude) lr_vector = loss_list[2][0].detach() # hand side info (left/right) # get hand contact _, contact_indices = torch.max(contact_vector, 2) contact_indices = contact_indices.squeeze(0).unsqueeze(-1).float() # get hand side lr = torch.sigmoid(lr_vector) > 0.5 lr = lr.squeeze(0).float() if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view( -1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS ) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view( -1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS ) + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() print(detect_time) if vis: im2show = np.copy(im) obj_dets, hand_dets = None, None for j in xrange(1, len(pascal_classes)): # inds = torch.nonzero(scores[:,j] > thresh).view(-1) if pascal_classes[j] == 'hand': inds = torch.nonzero(scores[:, j] > thresh_hand, as_tuple=False).view(-1) elif pascal_classes[j] == 'targetobject': inds = torch.nonzero(scores[:, j] > thresh_obj, as_tuple=False).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat( (cls_boxes, cls_scores.unsqueeze(1), contact_indices[inds], offset_vector.squeeze(0)[inds], lr[inds]), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if pascal_classes[j] == 'targetobject': obj_dets = cls_dets.cpu().numpy() if pascal_classes[j] == 'hand': hand_dets = cls_dets.cpu().numpy() if vis: # visualization im2show = vis_detections_filtered_objects_PIL( im2show, obj_dets, hand_dets, thresh_hand, thresh_obj) misc_toc = time.time() nms_time = misc_toc - misc_tic if webcam_num == -1: sys.stdout.write( 'im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r'.format( num_images + 1, len(imglist), detect_time, nms_time)) sys.stdout.flush() if vis and webcam_num == -1: folder_name = args.save_dir os.makedirs(folder_name, exist_ok=True) result_path = os.path.join( folder_name, imglist[num_images][:-4] + "_det.png") im2show.save(result_path) else: im2showRGB = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cv2.imshow("frame", im2showRGB) total_toc = time.time() total_time = total_toc - total_tic frame_rate = 1 / total_time print('Frame rate:', frame_rate) if cv2.waitKey(1) & 0xFF == ord('q'): break if webcam_num >= 0: cap.release() cv2.destroyAllWindows()
def eval_test(fasterRCNN, args, cfg, imdb, dataloader, output_dir): # initialize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) start = time.time() max_per_image = 100 vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 save_name = "faster_rcnn_10" num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] data_iter = iter(dataloader) _t = {"im_detect": time.time(), "misc": time.time()} det_file = os.path.join(output_dir, "detections.pkl") fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) # im_data.data.resize_(data[0].size()).copy_(data[0]) # im_info.data.resize_(data[1].size()).copy_(data[1]) # gt_boxes.data.resize_(data[2].size()).copy_(data[2]) # num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, RCNN_loss_cls, RCNN_loss_bbox, rois_label = fasterRCNN( im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()) box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = (box_deltas.view(-1, 4) * torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() + torch.FloatTensor( cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda()) box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write("im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r".format( i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite("result.png", im2show) pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, "wb") as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print("Evaluating detections") imdb.evaluate_detections(all_boxes, output_dir) end = time.time() print("test time: %0.4fs" % (end - start)) if "coco" in args.dataset: return imdb.coco_eval
def seal_detection(image_path): # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda > 0: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data, volatile=True) im_info = Variable(im_info, volatile=True) num_boxes = Variable(num_boxes, volatile=True) gt_boxes = Variable(gt_boxes, volatile=True) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() # imglist = os.listdir(images_path) #Beginning Load Images # num_images = len(imglist) # print('imglist:', imglist) # print('num_images:', num_images) # print('Loaded Photo: {} images.'.format(num_images)) # im_file = os.path.join(args.image_dir, imglist[num_images-1]) # im_file = images_path #modified # print('im_file', im_file) # edited im_in = np.array(imread(image_path)) if len(im_in.shape) == 2: im_in = im_in[:,:,np.newaxis] im_in = np.concatenate((im_in,im_in,im_in), axis=2) # rgb -> bgr im = im_in[:,:,::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) with torch.no_grad(): im_data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.resize_(1, 1, 5).zero_() num_boxes.resize_(1).zero_() # pdb.set_trace() det_tic = time.time() try: rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) except: print(imglist[num_images]) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() result_bbox = [] score_bbox = [] for j in xrange(1, len(pascal_classes)): inds = torch.nonzero(scores[:,j]>thresh).view(-1) if inds.numel() > 0: cls_scores = scores[:,j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] dets = cls_dets.cpu().numpy() for i in range(np.minimum(10, dets.shape[0])): bbox = tuple(int(np.round(x)) for x in dets[i, :4]) score = dets[i, -1] result_bbox.append(bbox) score_bbox.append(score) return result_bbox, score_bbox
for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes) ]) if len(image_scores) > max_per_image:
def eval_frcnn(frcnn_extra, device, fasterRCNN, is_break=False): _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(frcnn_extra.output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) data_iter_test = iter(frcnn_extra.dataloader_test) for i in range(frcnn_extra.num_images_test): data_test = next(data_iter_test) im_data = data_test[0].to(device) im_info = data_test[1].to(device) gt_boxes = data_test[2].to(device) num_boxes = data_test[3].to(device) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if frcnn_extra.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(frcnn_extra.imdb_test.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data_test[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() for j in range(1, frcnn_extra.imdb_test.num_classes): inds = torch.nonzero(scores[:, j] > frcnn_extra.thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if frcnn_extra.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] frcnn_extra.all_boxes[j][i] = cls_dets.cpu().numpy() else: frcnn_extra.all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if frcnn_extra.max_per_image > 0: image_scores = np.hstack([frcnn_extra.all_boxes[j][i][:, -1] for j in range(1, frcnn_extra.imdb_test.num_classes)]) if len(image_scores) > frcnn_extra.max_per_image: image_thresh = np.sort(image_scores)[-frcnn_extra.max_per_image] for j in range(1, frcnn_extra.imdb_test.num_classes): keep = np.where(frcnn_extra.all_boxes[j][i][:, -1] >= image_thresh)[0] frcnn_extra.all_boxes[j][i] = frcnn_extra.all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic if is_break: break ap = frcnn_extra.imdb_test.evaluate_detections(frcnn_extra.all_boxes, frcnn_extra.output_dir) return ap
def raber_detection(im_in): # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if use_gpu: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda > 0: cfg.CUDA = True if args.cuda > 0: fasterRCNN.cuda() fasterRCNN.eval() thresh = 0.05 vis = False if len(im_in.shape) == 2: im_in = im_in[:, :, np.newaxis] im_in = np.concatenate((im_in, im_in, im_in), axis=2) # rgb -> bgr im = im_in[:, :, ::-1] blobs, im_scales = _get_image_blob(im) assert len(im_scales) == 1, "Only single-image batch implemented" im_blob = blobs im_info_np = np.array([[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], dtype=np.float32) im_data_pt = torch.from_numpy(im_blob) im_data_pt = im_data_pt.permute(0, 3, 1, 2) im_info_pt = torch.from_numpy(im_info_np) im_data.data.resize_(im_data_pt.size()).copy_(im_data_pt) im_info.data.resize_(im_info_pt.size()).copy_(im_info_pt) gt_boxes.data.resize_(1, 1, 5).zero_() num_boxes.data.resize_(1).zero_() # pdb.set_trace() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4) else: if args.cuda > 0: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS) \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS) box_deltas = box_deltas.view(1, -1, 4 * len(pascal_classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class _ = torch.from_numpy(np.tile(boxes, (1, scores.shape[1]))) pred_boxes = _.cuda() if args.cuda > 0 else _ pred_boxes /= im_scales[0] scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() if vis: im2show = np.copy(im) for j in range(1, len(pascal_classes)): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] # keep = nms(cls_dets, cfg.TEST.NMS, force_cpu=not cfg.USE_GPU_NMS) keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, pascal_classes[j], cls_dets.cpu().numpy(), 0.5) box_results = cls_dets.cpu().numpy() if vis: # cv2.imshow('test', im2show) # cv2.waitKey(0) result_path = os.path.join(args.result_dir, "OnlineDet.jpg") cv2.imwrite(result_path, im2show) return box_results
def forward(self, input): # Algorithm: # # for each (H, W) location i # generate A anchor boxes centered on cell i # apply predicted bbox deltas at cell i to each of the A anchors # clip predicted boxes to image # remove predicted boxes with either height or width < threshold # sort all (proposal, score) pairs by score from highest to lowest # take top pre_nms_topN proposals before NMS # apply NMS with threshold 0.7 to remaining proposals # take after_nms_topN proposals after NMS # return the top proposals (-> RoIs top, scores top) # the first set of _num_anchors channels are bg probs # the second set are the fg probs # scores = input[0][:, self._num_anchors:, :, :] scores = input[0][:, :, 1] # batch_size x num_rois x 1 bbox_deltas = input[1] im_info = input[2] cfg_key = input[3] feat_shapes = input[4] pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N nms_thresh = cfg[cfg_key].RPN_NMS_THRESH min_size = cfg[cfg_key].RPN_MIN_SIZE batch_size = bbox_deltas.size(0) anchors = torch.from_numpy(generate_anchors_all_pyramids(self._fpn_scales, self._anchor_ratios, feat_shapes, self._fpn_feature_strides, self._fpn_anchor_stride)).type_as(scores) num_anchors = anchors.size(0) anchors = anchors.view(1, num_anchors, 4).expand(batch_size, num_anchors, 4) # Convert anchors into proposals via bbox transformations proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size) # 2. clip predicted boxes to image proposals = clip_boxes(proposals, im_info, batch_size) # keep_idx = self._filter_boxes(proposals, min_size).squeeze().long().nonzero().squeeze() scores_keep = scores proposals_keep = proposals _, order = torch.sort(scores_keep, 1, True) # bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous() # bbox_deltas = bbox_deltas.view(batch_size, -1, 4) # # # Same story for the scores: # scores = scores.permute(0, 2, 3, 1).contiguous() # scores = scores.view(batch_size, -1) output = scores.new(batch_size, post_nms_topN, 5).zero_() for i in range(batch_size): # # 3. remove predicted boxes with either height or width < threshold # # (NOTE: convert min_size to input image scale stored in im_info[2]) proposals_single = proposals_keep[i] scores_single = scores_keep[i] # # 4. sort all (proposal, score) pairs by score from highest to lowest # # 5. take top pre_nms_topN (e.g. 6000) order_single = order[i] if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel(): order_single = order_single[:pre_nms_topN] proposals_single = proposals_single[order_single, :] scores_single = scores_single[order_single].view(-1,1) # 6. apply nms (e.g. threshold = 0.7) # 7. take after_nms_topN (e.g. 300) # 8. return the top proposals (-> RoIs top) # keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh) keep_idx_i = nms(proposals_single, scores_single.squeeze(1), nms_thresh) keep_idx_i = keep_idx_i.long().view(-1) if post_nms_topN > 0: keep_idx_i = keep_idx_i[:post_nms_topN] proposals_single = proposals_single[keep_idx_i, :] scores_single = scores_single[keep_idx_i, :] # padding 0 at the end. num_proposal = proposals_single.size(0) output[i,:,0] = i output[i,:num_proposal,1:] = proposals_single return output