def loop(): args = parse_args() print('Called with args:') print(args) if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_test" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] elif args.dataset == "vg": args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] args.cfg_file = "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) # initilize the network here. if args.net == 'vgg16': fpn = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fpn = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fpn = resnet(imdb.classes, 50, pretrained=True, class_agnostic=args.class_agnostic) elif args.net == 'res152': fpn = resnet(imdb.classes, 152, pretrained=True, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fpn.create_architecture() print('load model successfully!') im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fpn.cuda() start = time.time() max_per_image = 100 vis =True #args.vis if vis: thresh = 0.0 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) for h in range(200): dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'fpn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fpn.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] fpn.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = data_iter.next() im_data.data.resize_(data[0].size()).copy_(data[0]) im_info.data.resize_(data[1].size()).copy_(data[1]) gt_boxes.data.resize_(data[2].size()).copy_(data[2]) num_boxes.data.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ _, _, _, _, _ = fpn(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data # 1*300*10 boxes = rois.data[:, :, 1:5] # 1*300*4 if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data # 1*300*40 if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = boxes pred_boxes /= data[1][0][2].cuda() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, cfg.TEST.NMS,~args.cuda) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('images/result%d_%d.png' %(args.checkepoch,i), im2show) #pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) del data del pred_boxes del scores torch.cuda.empty_cache() with open(det_file, 'wb') as f: cPickle.dump(all_boxes, f, cPickle.HIGHEST_PROTOCOL) print('Evaluating detections') aps, clss = imdb.evaluate_detections(all_boxes, output_dir) #print(aps) with open("result.txt", 'a+') as f: # print(args.checkepoch) lp="" cc=0 for b in clss: if cc!=len(clss)-1: lp=lp+"'"+str(b) + ":" + str(aps[cc])+"'," else: lp = lp + "'" + str(b) + ":" + str(aps[cc])+"'" cc=cc+1 sp = "["+lp+ "] ls:" + str(args.checksession) + "_" + str(args.checkepoch) # print(sp) f.write(sp + "\n") end = time.time() print("test time: %0.4fs" % (end - start)) args.checkepoch = args.checkepoch + 1 del data_iter del dataset del dataloader torch.cuda.empty_cache() #torch.empty_cache() gc.collect()
if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = False cfg.USE_GPU_NMS = args.cuda if args.use_det: print("Using VID and DET datasets.") imdb_names = args.imdb_name.split('+') assert len(imdb_names) < 3, 'Cannot handle > 2 datasets when using VID and DET.' # vid and det #print(imdb_names[0]) #print(imdb_names[1]) #input() vid_imdb, vid_roidb, vid_ratio_list, vid_ratio_index = combined_roidb(imdb_names[0]) det_imdb, det_roidb, det_ratio_list, det_ratio_index = combined_roidb(imdb_names[1]) vid_train_size, det_train_size = len(vid_roidb), len(det_roidb) train_size = min(vid_train_size, det_train_size) assert vid_imdb.num_classes==det_imdb.num_classes, 'VID and DET must have same number of classes.' imdb_classes = vid_imdb.classes else: #print(args.imdb_name) #input() imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) imdb_classes = imdb.classes print('{:d} roidb entries'.format(train_size)) output_dir = os.path.join(args.save_dir, args.arch, args.net, args.dataset)
if args.cfg_file is not None: cfg_from_file(args.cfg_file) # -- Note: Use validation set and disable the flipped to enable faster loading. if args.cuda: cfg.CUDA = True # xxx: different from training cfg.TRAIN.USE_FLIPPED = False cfg.USE_GPU_NMS = True if args.cuda else False print('Using config:') pprint.pprint(cfg) # np.random.seed(cfg.RNG_SEED) # train set imdb_name = DATASET + "_train_supervised" imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name) train_size = len(roidb) print('{:d} roidb entries'.format(train_size)) output_dir = os.path.join(args.save_dir, args.net, DATASET, 'all') os.makedirs(output_dir, exist_ok=True) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size,
print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) output_dir = args.save_dir if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size,
np.random.seed(random_seed) torch.manual_seed(random_seed) if args.cuda: torch.cuda.manual_seed_all(random_seed) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set cfg.USE_GPU_NMS = args.cuda # create dataloader imdb, roidb, ratio_list, ratio_index, query = combined_roidb( args.imdb_name, True) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, query, args.batch_size, imdb._classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers, collate_fn=collate_tensors)
print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) # torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb = combined_roidb(args.imdb_name, root_path=args.data_root) # imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ # imdb.num_classes, training=True) # dataset = roibatchLoader(roidb , imdb.num_classes, training=True) dataset = Detection(roidb,
args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
def test(args, model=None): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # Load dataset imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False) imdb_vu.competition_mode(on=True) dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu._classes, training=False) # initilize the network here. if not model: if args.net == 'vgg16': fasterRCNN = vgg16(imdb_vu.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb_vu.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb_vu.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") fasterRCNN.create_architecture() # Load checkpoint print("load checkpoint %s" % (args.weights)) checkpoint = torch.load(args.weights) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') else: # evaluate constructed model fasterRCNN = model # initialize the tensor holder here. im_data = torch.FloatTensor(1) query = torch.FloatTensor(1) im_info = torch.FloatTensor(1) catgory = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: cfg.CUDA = True fasterRCNN.cuda() im_data = im_data.cuda() query = query.cuda() im_info = im_info.cuda() catgory = catgory.cuda() gt_boxes = gt_boxes.cuda() # record time start = time.time() # visiualization vis = args.vis if hasattr(args, 'vis') else None if vis: thresh = 0.05 else: thresh = 0.0 max_per_image = 100 fasterRCNN.eval() dataset_vu.query_position = 0 test_scales = cfg.TEST.SCALES multiscale_iterators = [] for i_scale, test_scale in enumerate(test_scales): cur_dataloader_vu = torch.utils.data.DataLoader(dataset_vu, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) cur_data_iter_vu = iter(cur_dataloader_vu) multiscale_iterators.append(cur_data_iter_vu) # total quantity of testing images, each images include multiple detect class num_images_vu = len(imdb_vu.image_index) num_detect = len(ratio_index_vu[0]) all_boxes = [[[] for _ in range(num_images_vu)] for _ in range(imdb_vu.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} for i, index in enumerate(ratio_index_vu[0]): det_tic = time.time() multiscale_boxes = [] multiscale_scores = [] for i_scale, (data_iter_vu, test_scale) in enumerate( zip(multiscale_iterators, test_scales)): # need to rewrite cfg.TRAIN.SCALES - very hacky! BACKUP_TRAIN_SCALES = cfg.TRAIN.SCALES cfg.TRAIN.SCALES = [test_scale] data = next(data_iter_vu) cfg.TRAIN.SCALES = BACKUP_TRAIN_SCALES with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) query.resize_(data[1].size()).copy_(data[1]) im_info.resize_(data[2].size()).copy_(data[2]) gt_boxes.resize_(data[3].size()).copy_(data[3]) catgory.data.resize_(data[4].size()).copy_(data[4]) # Run Testing if not hasattr(args, "class_image_augmentation" ) or not args.class_image_augmentation: queries = [query] elif args.class_image_augmentation.lower() == "rotation90": queries = [query] for _ in range(3): queries.append(queries[-1].rot90(1, [2, 3])) else: raise RuntimeError( "Unknown class_image_augmentation: {}".format( args.class_image_augmentation)) for q in queries: rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, _, RCNN_loss_bbox, \ rois_label, weight = fasterRCNN(im_data, q, im_info, gt_boxes, catgory) scores = cls_prob.data boxes = rois.data[:, :, 1:5] # Apply bounding-box regression if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view( 1, -1, 4 * len(imdb_vu.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # Resize to original ratio pred_boxes /= data[2][0][2].item() # Remove batch_size dimension scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() multiscale_scores.append(scores) multiscale_boxes.append(pred_boxes) scores = torch.cat(multiscale_scores, dim=0) pred_boxes = torch.cat(multiscale_boxes, dim=0) # Record time det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() # Post processing inds = torch.nonzero(scores > thresh).view(-1) if inds.numel() > 0: # remove useless indices cls_scores = scores[inds] cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # rearrange order _, order = torch.sort(cls_scores, 0, True) cls_dets = cls_dets[order] # NMS keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[catgory][index] = cls_dets.cpu().numpy() # Limit to max_per_image detections *over all classes* if max_per_image > 0: try: image_scores = all_boxes[catgory][index][:, -1] if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] keep = np.where( all_boxes[catgory][index][:, -1] >= image_thresh)[0] all_boxes[catgory][index] = all_boxes[catgory][index][ keep, :] except: pass misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_detect, detect_time, nms_time)) sys.stdout.flush() # save test image if vis and i % 1 == 0: im2show = cv2.imread( dataset_vu._roidb[dataset_vu.ratio_index[i]]['image']) im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.3) o_query = data[1][0].permute(1, 2, 0).contiguous().cpu().numpy() o_query *= [0.229, 0.224, 0.225] o_query += [0.485, 0.456, 0.406] o_query *= 255 o_query = o_query[:, :, ::-1] (h, w, c) = im2show.shape o_query = cv2.resize(o_query, (h, h), interpolation=cv2.INTER_LINEAR) im2show = np.concatenate((im2show, o_query), axis=1) vis_path = "./test_img" if not os.path.isdir(vis_path): os.makedirs(vis_path) cv2.imwrite(os.path.join(vis_path, "%d_d.png" % (i)), im2show) print('Evaluating detections') mAP = imdb_vu.evaluate_detections(all_boxes, None) end = time.time() print("test time: %0.4fs" % (end - start)) return mAP
def eval_result(args, logger, epoch, output_dir): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) args.batch_size = 1 imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False, root_path=args.data_root) imdb.competition_mode(on=True) load_name = os.path.join(output_dir, 'thundernet_epoch_{}.pth'.format(epoch, )) layer = int(args.net.split("_")[1]) _RCNN = snet(imdb.classes, layer, pretrained_path=None, class_agnostic=args.class_agnostic) _RCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage ) # Load all tensors onto the CPU _RCNN.load_state_dict(checkpoint['model']) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # hm = torch.FloatTensor(1) # reg_mask = torch.LongTensor(1) # wh = torch.FloatTensor(1) # offset = torch.FloatTensor(1) # ind = torch.LongTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # hm = hm.cuda() # reg_mask = reg_mask.cuda() # wh = wh.cuda() # offset = offset.cuda() # ind = ind.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) # hm = Variable(hm) # reg_mask = Variable(reg_mask) # wh = Variable(wh) # offset = Variable(offset) # ind = Variable(ind) if args.cuda: cfg.CUDA = True if args.cuda: _RCNN.cuda() start = time.time() max_per_image = 100 vis = True if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'thundernet' num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ # imdb.num_classes, training=False, normalize=False) # dataset = roibatchLoader(roidb, imdb.num_classes, training=False) dataset = Detection(roidb, num_classes=imdb.num_classes, transform=BaseTransform(cfg.TEST.SIZE, cfg.PIXEL_MEANS), training=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') _RCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) # hm.resize_(data[4].size()).copy_(data[4]) # reg_mask.resize_(data[5].size()).copy_(data[5]) # wh.resize_(data[6].size()).copy_(data[6]) # offset.resize_(data[7].size()).copy_(data[7]) # ind.resize_(data[8].size()).copy_(data[8]) det_tic = time.time() with torch.no_grad(): time_measure, \ rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = _RCNN(im_data, im_info, gt_boxes, num_boxes, # hm,reg_mask,wh,offset,ind ) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # pred_boxes /= data[1][0][2].item() pred_boxes[:, :, 0::2] /= data[1][0][2].item() pred_boxes[:, :, 1::2] /= data[1][0][3].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) # keep = soft_nms(cls_dets.cpu().numpy(), Nt=0.5, method=2) # keep = torch.as_tensor(keep, dtype=torch.long) cls_dets = cls_dets[keep.view(-1).long()] if vis: vis_detections(im2show, imdb.classes[j], color_list[j - 1].tolist(), cls_dets.cpu().numpy(), 0.6) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write( 'im_detect: {:d}/{:d}\tDetect: {:.3f}s (RPN: {:.3f}s, Pre-RoI: {:.3f}s, RoI: {:.3f}s, Subnet: {:.3f}s)\tNMS: {:.3f}s\r' \ .format(i + 1, num_images, detect_time, time_measure[0], time_measure[1], time_measure[2], time_measure[3], nms_time)) sys.stdout.flush() if vis and i % 200 == 0 and args.use_tfboard: im2show = im2show[:, :, ::-1] logger.add_image('pred_image_{}'.format(i), trans.ToTensor()(Image.fromarray( im2show.astype('uint8'))), global_step=i) # cv2.imwrite('result.png', im2show) # pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') ap_50 = imdb.evaluate_detections(all_boxes, output_dir) logger.add_scalar("map_50", ap_50, global_step=epoch) end = time.time() print("test time: %0.4fs" % (end - start))
def evaluation(name, net=None, vis=False, cuda=True, class_agnostic=False): cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) if not net: input_dir = args.load_dir + "/" + args.net + "/" + args.dataset # input_dir = 'weight' if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) # load_name = os.path.join(input_dir, # 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) load_name = os.path.join( input_dir, 'faster_rcnn_{}_best.pth'.format(cfg['POOLING_MODE'])) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') else: fasterRCNN = net # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if cuda: cfg.CUDA = True if cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 # vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') map = imdb.evaluate_detections(all_boxes, output_dir) # print(map) end = time.time() print("test time: %0.4fs" % (end - start)) return map
args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False if args.part == 'test_s': imdb, roidb, ratio_list, ratio_index = combined_roidb( args.s_imdbtest_name, False) elif args.part == 'test_t': imdb, roidb, ratio_list, ratio_index = combined_roidb( args.t_imdbtest_name, False) elif args.part == 'test_all': imdb, roidb, ratio_list, ratio_index = combined_roidb( args.all_imdbtest_name, False) else: print("don't have the test part !") # pdb.set_trace() imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) # input_dir = args.load_dir + "/" + args.net + "/" + args.dataset
if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name)#imdb_name=voc_2007_trainval train_size = len(roidb)#10022 # ratio_list[ratio_index]将各个图片的的长宽比率按照从小到大排序(根据ratio_index排好的) # ratio_index[4 1 5 0 2 3,...] 代表第四个最小,其次第一个最小,。。(从小到大的索引) ''' roidb:=[{},{},{},....{}] {'boxes': array([[ 47, 239, 194, 370], [ 7, 11, 351, 497]], dtype=uint16), 'gt_classes': array([12, 15]), 'gt_ishard': array([0, 0]), 'gt_overlaps': <2x21 sparse matrix of type '<class 'numpy.float32'>' with 2 stored elements in Compressed Sparse Row format>, 'flipped': False, 'seg_areas': array([ 19536., 168015.], dtype=float32)} box.shape=(n,4) n代表这张图片上有几个目标 gt_classes.shape=(n) 例如:array([12, 15]) 代表第一个目标是第12类(猫),第二个目标是第15类(人) gt_overlaps. overlaps.shape=(n,21) 值为0或1 默认0, 如果[2,20]=1代表该张图片上第2个目标是第20类的分割 flipped= False(前5011个为false ,后5011个为True)