args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name + "+voc_2007_train") train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size,
args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]'] args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.dataset if not os.path.exists(input_dir): raise Exception('There is no input directory for loading network from ' + input_dir) load_name = os.path.join(input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic)
print(args) args = set_dataset_args(args,test=True) if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") np.random.seed(cfg.RNG_SEED) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs_target is not None: cfg_from_list(args.set_cfgs_target) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name_target, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) # initilize the network here. from model.faster_rcnn.vgg16_HTCN import vgg16 from model.faster_rcnn.resnet_HTCN import resnet if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=args.class_agnostic,lc=args.lc,gc=args.gc, la_attention=args.LA_ATT, mid_attention = args.MID_ATT) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=args.class_agnostic, lc=args.lc, gc=args.gc, la_attention=args.LA_ATT, mid_attention = args.MID_ATT)
else "cfgs/{}.yml".format(args.net) ) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print("Using config:") pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False if args.part == "test_s": imdb, roidb, ratio_list, ratio_index = combined_roidb( args.s_imdbtest_name, False ) elif args.part == "test_t": imdb, roidb, ratio_list, ratio_index = combined_roidb( args.t_imdbtest_name, False ) elif args.part == "test_all": imdb, roidb, ratio_list, ratio_index = combined_roidb( args.all_imdbtest_name, False ) else: print("don't have the test part !") pdb.set_trace() imdb.competition_mode(on=True)
if __name__ == '__main__': args = parse_args() alpha = args.alpha eps = 255 * args.eps iter_num = args.iter print("Start iteration: ", iter_num) np.random.seed(cfg.RNG_SEED) cfg.TRAIN.USE_FLIPPED = False # training parameter in combined_roidb is set to True by default to filter # image pairs without bounding box imdb, roidb, ratio_list, ratio_index = combined_roidb('kitti_val') attack_size = len(roidb) # training parameter is set to True to prepare ground truth dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True)
def train(): # check cuda devices if not torch.cuda.is_available(): assert RuntimeError("Training can only be done by GPU. Please use --cuda to enable training.") if torch.cuda.is_available() and not args.cuda: assert RuntimeError("You have a CUDA device, so you should probably run with --cuda") # init random seed np.random.seed(cfg.RNG_SEED) # init logger # TODO: RESUME LOGGER if args.use_tfboard: from model.utils.logger import Logger # Set the logger current_t = time.strftime("%Y_%m_%d") + "_" + time.strftime("%H:%M:%S") logger = Logger(os.path.join('.', 'logs', current_t + "_" + args.frame + "_" + args.dataset + "_" + args.net)) # init dataset imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) sampler_batch = sampler(train_size, args.batch_size) iters_per_epoch = int(train_size / args.batch_size) if args.frame in {"fpn", "faster_rcnn"}: dataset = objdetMulInSizeRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True, cls_list=imdb.classes, augmentation=cfg.TRAIN.COMMON.AUGMENTATION) elif args.frame in {"ssd"}: dataset = objdetRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True, cls_list=imdb.classes, augmentation=cfg.TRAIN.COMMON.AUGMENTATION) elif args.frame in {"ssd_vmrn", "vam"}: dataset = vmrdetRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True, cls_list=imdb.classes, augmentation=cfg.TRAIN.COMMON.AUGMENTATION) elif args.frame in {"faster_rcnn_vmrn"}: dataset = vmrdetMulInSizeRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True, cls_list=imdb.classes, augmentation=cfg.TRAIN.COMMON.AUGMENTATION) elif args.frame in {"fcgn"}: dataset = graspdetRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True, cls_list=imdb.classes, augmentation=cfg.TRAIN.COMMON.AUGMENTATION) elif args.frame in {"all_in_one"}: dataset = allInOneMulInSizeRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True, cls_list=imdb.classes, augmentation=cfg.TRAIN.COMMON.AUGMENTATION) elif args.frame in {"mgn"}: dataset = roigdetMulInSizeRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True, cls_list=imdb.classes, augmentation=cfg.TRAIN.COMMON.AUGMENTATION) else: raise RuntimeError dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers) args.iter_per_epoch = int(len(roidb) / args.batch_size) # init output directory for model saving output_dir = args.save_dir + "/" + args.dataset + "/" + args.net if not os.path.exists(output_dir): os.makedirs(output_dir) if args.vis: visualizer = dataViewer(imdb.classes) data_vis_dir = os.path.join(args.save_dir, args.dataset, 'data_vis', 'train') if not os.path.exists(data_vis_dir): os.makedirs(data_vis_dir) id_number_to_name = {} for r in roidb: id_number_to_name[r["img_id"]] = r["image"] # init network Network, optimizer = init_network(args, imdb.classes) # init variables current_result, best_result, loss_temp, loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box, loss_rel_pred, \ loss_grasp_box, loss_grasp_cls, fg_cnt, bg_cnt, fg_grasp_cnt, bg_grasp_cnt = 0,0,0,0,0,0,0,0,0,0,0,0,0,0 save_flag, rois, rpn_loss_cls, rpn_loss_box, rel_loss_cls, cls_prob, bbox_pred, rel_cls_prob, loss_bbox, loss_cls, \ rois_label, grasp_cls_loss, grasp_bbox_loss, grasp_conf_label = \ False, None,None,None,None,None,None,None,None,None,None,None,None,None for epoch in range(args.start_epoch, args.max_epochs + 1): # setting to train mode Network.train() start_epoch_time = time.time() start = time.time() data_iter = iter(dataloader) for step in range(iters_per_epoch): # get data batch data_batch = next(data_iter) if args.vis: for i in range(data_batch[0].size(0)): data_list = [data_batch[d][i] for d in range(len(data_batch))] im_vis = vis_gt(data_list, visualizer, args.frame, train_mode=True) img_name = id_number_to_name[data_batch[1][i][4].item()].split("/")[-1] # When using cv2.imwrite, channel order should be BGR cv2.imwrite(os.path.join(data_vis_dir, img_name), im_vis[:, :, ::-1]) # ship to cuda if args.cuda: data_batch = makeCudaData(data_batch) # network forward Network.zero_grad() # forward process if args.frame == 'faster_rcnn_vmrn': rois, cls_prob, bbox_pred, rel_cls_prob, rpn_loss_cls, rpn_loss_box, loss_cls, \ loss_bbox, rel_loss_cls,rois_label = Network(data_batch) if rel_loss_cls == 0: loss = rpn_loss_cls.mean() + rpn_loss_box.mean() + loss_cls.mean() + loss_bbox.mean() else: loss = rpn_loss_cls.mean() + rpn_loss_box.mean() + loss_cls.mean() + loss_bbox.mean() + rel_loss_cls.mean() elif args.frame == 'faster_rcnn' or args.frame == 'fpn': rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, loss_cls, loss_bbox, \ rois_label = Network(data_batch) loss = rpn_loss_cls.mean() + rpn_loss_box.mean() + loss_cls.mean() + loss_bbox.mean() elif args.frame == 'fcgn': bbox_pred, cls_prob, loss_bbox, loss_cls, rois_label,rois = Network(data_batch) loss = loss_bbox.mean() + loss_cls.mean() elif args.frame == 'mgn': rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, loss_cls, loss_bbox, rois_label, grasp_loc, \ grasp_prob, grasp_bbox_loss, grasp_cls_loss, grasp_conf_label, grasp_all_anchors = Network(data_batch) loss = rpn_loss_box.mean() + rpn_loss_cls.mean() + loss_cls.mean() + loss_bbox.mean() + \ cfg.MGN.OBJECT_GRASP_BALANCE * (grasp_bbox_loss.mean() + grasp_cls_loss.mean()) elif args.frame == 'all_in_one': rois, cls_prob, bbox_pred, rel_cls_prob, rpn_loss_cls, rpn_loss_box, loss_cls, loss_bbox, rel_loss_cls, rois_label, \ grasp_loc, grasp_prob, grasp_bbox_loss,grasp_cls_loss, grasp_conf_label, grasp_all_anchors = Network(data_batch) loss = rpn_loss_box.mean() + rpn_loss_cls.mean() + loss_cls.mean() + loss_bbox.mean() + rel_loss_cls.mean() + \ cfg.MGN.OBJECT_GRASP_BALANCE * grasp_bbox_loss.mean() + grasp_cls_loss.mean() elif args.frame == 'ssd': bbox_pred, cls_prob, loss_bbox, loss_cls = Network(data_batch) loss = loss_bbox.mean() + loss_cls.mean() elif args.frame == 'ssd_vmrn' or args.frame == 'vam': bbox_pred, cls_prob, rel_result, loss_bbox, loss_cls, rel_loss_cls = Network(data_batch) if rel_loss_cls==0: loss = loss_cls.mean() + loss_bbox.mean() else: loss = loss_cls.mean() + loss_bbox.mean() + rel_loss_cls.mean() loss_temp += loss.data.item() # backward process optimizer.zero_grad() loss.backward() if args.net == "vgg16": clip_gradient(Network, 10.) optimizer.step() # record training information if args.mGPUs: if rpn_loss_cls is not None: loss_rpn_cls += rpn_loss_cls.mean().data[0].item() if rpn_loss_box is not None: loss_rpn_box += rpn_loss_box.mean().data[0].item() if loss_cls is not None: loss_rcnn_cls += loss_cls.mean().data[0].item() if loss_bbox is not None: loss_rcnn_box += loss_bbox.mean().data[0].item() if rel_loss_cls is not None and rel_loss_cls!=0: loss_rel_pred += rel_loss_cls.mean().data[0].item() if grasp_cls_loss is not None: loss_grasp_cls += grasp_cls_loss.mean().data[0].item() if grasp_bbox_loss is not None: loss_grasp_box += grasp_bbox_loss.mean().data[0].item() if rois_label is not None: tempfg = torch.sum(rois_label.data.ne(0)) fg_cnt += tempfg bg_cnt += (rois_label.data.numel() - tempfg) if grasp_conf_label is not None: tempfg = torch.sum(grasp_conf_label.data.ne(0)) fg_grasp_cnt += tempfg bg_grasp_cnt += (grasp_conf_label.data.numel() - tempfg) else: if rpn_loss_cls is not None: loss_rpn_cls += rpn_loss_cls.item() if rpn_loss_cls is not None: loss_rpn_box += rpn_loss_box.item() if loss_cls is not None: loss_rcnn_cls += loss_cls.item() if loss_bbox is not None: loss_rcnn_box += loss_bbox.item() if rel_loss_cls is not None and rel_loss_cls != 0: loss_rel_pred += rel_loss_cls.item() if grasp_cls_loss is not None: loss_grasp_cls += grasp_cls_loss.item() if grasp_bbox_loss is not None: loss_grasp_box += grasp_bbox_loss.item() if rois_label is not None: tempfg = torch.sum(rois_label.data.ne(0)) fg_cnt += tempfg bg_cnt += (rois_label.data.numel() - tempfg) if grasp_conf_label is not None: tempfg = torch.sum(grasp_conf_label.data.ne(0)) fg_grasp_cnt += tempfg bg_grasp_cnt += (grasp_conf_label.data.numel() - tempfg) if Network.iter_counter % args.disp_interval == 0: end = time.time() loss_temp /= args.disp_interval loss_rpn_cls /= args.disp_interval loss_rpn_box /= args.disp_interval loss_rcnn_cls /= args.disp_interval loss_rcnn_box /= args.disp_interval loss_rel_pred /= args.disp_interval loss_grasp_cls /= args.disp_interval loss_grasp_box /= args.disp_interval print("[session %d][epoch %2d][iter %4d/%4d] \n\t\t\tloss: %.4f, lr: %.2e" \ % (args.session, epoch, step, iters_per_epoch, loss_temp, optimizer.param_groups[0]['lr'])) print('\t\t\ttime cost: %f' % (end - start,)) if rois_label is not None: print("\t\t\tfg/bg=(%d/%d)" % (fg_cnt, bg_cnt)) if grasp_conf_label is not None: print("\t\t\tgrasp_fg/grasp_bg=(%d/%d)" % (fg_grasp_cnt, bg_grasp_cnt)) if rpn_loss_box is not None and rpn_loss_cls is not None: print("\t\t\trpn_cls: %.4f\n\t\t\trpn_box: %.4f\n\t\t\trcnn_cls: %.4f\n\t\t\trcnn_box %.4f" \ % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box)) else: print("\t\t\trcnn_cls: %.4f\n\t\t\trcnn_box %.4f" \ % (loss_rcnn_cls, loss_rcnn_box)) if rel_loss_cls is not None: print("\t\t\trel_loss %.4f" \ % (loss_rel_pred,)) if grasp_cls_loss is not None and grasp_bbox_loss is not None: print("\t\t\tgrasp_cls: %.4f\n\t\t\tgrasp_box %.4f" \ % (loss_grasp_cls, loss_grasp_box)) if args.use_tfboard: info = { 'loss': loss_temp, 'loss_rcnn_cls': loss_rcnn_cls, 'loss_rcnn_box': loss_rcnn_box, } if rpn_loss_cls: info['loss_rpn_cls'] = loss_rpn_cls if rpn_loss_box: info['loss_rpn_box'] = loss_rpn_box if rel_loss_cls: info['loss_rel_pred'] = loss_rel_pred for tag, value in info.items(): logger.scalar_summary(tag, value, Network.iter_counter) loss_temp = 0. loss_rpn_cls = 0. loss_rpn_box = 0. loss_rcnn_cls = 0. loss_rcnn_box = 0. loss_rel_pred = 0. loss_grasp_box = 0. loss_grasp_cls = 0. fg_cnt = 0. bg_cnt = 0. fg_grasp_cnt = 0. bg_grasp_cnt = 0. start = time.time() # adjust learning rate if args.lr_decay_step == 0: # clr = lr / (1 + decay * n) -> lr_n / lr_n+1 = (1 + decay * (n+1)) / (1 + decay * n) decay = (1 + args.lr_decay_gamma * Network.iter_counter) / (1 + args.lr_decay_gamma * (Network.iter_counter + 1)) adjust_learning_rate(optimizer, decay) elif Network.iter_counter % (args.lr_decay_step) == 0: adjust_learning_rate(optimizer, args.lr_decay_gamma) # test and save if (Network.iter_counter - 1)% cfg.TRAIN.COMMON.SNAPSHOT_ITERS == 0: # test network and record results if cfg.TRAIN.COMMON.SNAPSHOT_AFTER_TEST: Network.eval() current_result = evalute_model(Network, args.imdbval_name, args) if args.use_tfboard: logger.scalar_summary('mAP', current_result, Network.iter_counter) Network.train() if current_result > best_result: best_result = current_result save_flag = True else: save_flag = True if save_flag: save_name = os.path.join(output_dir, args.frame + '_{}_{}_{}.pth'.format(args.session, epoch, step)) save_checkpoint({ 'session': args.session, 'epoch': epoch + 1, 'model': Network.state_dict(), 'optimizer': optimizer.state_dict(), 'pooling_mode': cfg.RCNN_COMMON.POOLING_MODE, 'class_agnostic': args.class_agnostic, }, save_name) print('save model: {}'.format(save_name)) save_flag = False end_epoch_time = time.time() print("Epoch finished. Time costing: ", end_epoch_time - start_epoch_time, "s")
print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdb_name) #通过添加一堆元数据将roidb转换为可训练的roidb train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) sys.stdout.flush() output_dir = args.save_dir if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, args.batch_size) # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size,
def get_ready(query_img_path): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) np.random.seed(cfg.RNG_SEED) if args.dataset == "coco": args.imdb_name = "coco_2017_train" args.imdbval_name = "coco_2017_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) args.cfg_file = "cfgs/{}_{}.yml".format( args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format( args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False) imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False, seen=args.seen) # imdb_vs.competition_mode(on=True) imdb_vu.competition_mode(on=True) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) query = torch.FloatTensor(1) im_info = torch.FloatTensor(1) catgory = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) if args.cuda: cfg.CUDA = True fasterRCNN.cuda() output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen') dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu.num_classes, training=False, seen=args.seen) fasterRCNN.eval() avg = 0 dataset_vu.query_position = avg num_images_vu = len(imdb_vu.image_index) all_boxes = [[[] for _ in xrange(num_images_vu)] for _ in xrange(imdb_vu.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir_vu, 'detections_%d_%d.pkl' % (args.seen, avg)) print(det_file) # make query data query_im = imread(query_img_path) query_im = cv2.resize(query_im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR) _query_im = np.copy(query_im) query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128) query_im = torch.tensor(query_im) query_im = torch.unsqueeze(query_im, 0) query_im = query_im.transpose(1, 3) query = query_im.transpose(2, 3) query = query.cuda() return fasterRCNN, all_boxes, query, _query_im
# torch.backends.cudnn.benchmark = True torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = False cfg.USE_GPU_NMS = args.cuda imdb_cityscapes, roidb_cityscapes, ratio_list_cityscapes, ratio_index_cityscapes = combined_roidb( 'fullycityscapes_train') train_size_cityscapes = len(roidb_cityscapes) imdb_cityscapes_pl, roidb_cityscapes_pl, ratio_list_cityscapes_pl, ratio_index_cityscapes_pl = combined_roidb( 'foggycityscapes_train') train_size_cityscapes_pl = len(roidb_cityscapes_pl) print('{:d} roidb entries in fullycityscapes_train'.format( len(roidb_cityscapes))) print('{:d} roidb entries in foggycityscapes_train'.format( len(roidb_cityscapes_pl))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir)
np.random.seed(cfg.RNG_SEED) ################################################################################################################################################### #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) print('train_size', train_size) print('{:d} roidb entries'.format(len(roidb))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, \ sampler=sampler_batch, num_workers=args.num_workers)
print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda # create dataloader' print(args.imdb_name) imdb, roidb, ratio_list, ratio_index, query = combined_roidb(args.imdb_name, True, seen=args.seen) train_size = len(roidb) class_to_coco_cat_id = imdb._class_to_coco_cat_id coco_class_ind_to_cat_id = imdb.coco_class_ind_to_cat_id # print(_class_to_coco_cat_id) print('{:d} roidb entries'.format(len(roidb))) sampler_batch = sampler(train_size, args.batch_size) # dataset = roibatchLoader(roidb, ratio_list, ratio_index, query, args.batch_size, imdb.num_classes, training=True) dataset = roibatchLoader(roidb, ratio_list, ratio_index, query, args.batch_size, imdb.num_classes, args.sketch_path, args.sketch_class_2_label, class_to_coco_cat_id, coco_class_ind_to_cat_id ,training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers) # create output directory output_dir = args.save_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir)
# torch.backends.cudnn.benchmark = False # torch.backends.cudnn.deterministic = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda # dataset-1 print('args.imdb_name: ', args.imdb_name) imdb1, roidb1, ratio_list1, ratio_index1 = combined_roidb(args.imdb_name) train_size1 = len(roidb1) # this is not firmed. sampler_batch1 = sampler(train_size1, args.batch_size) dataset1 = roibatchLoader(roidb1, ratio_list1, ratio_index1, args.batch_size, \ imdb1.num_classes, training=True, prep_type=args.prep_type, share_return=True, progress_return=True ) dataloader1 = torch.utils.data.DataLoader(dataset1, batch_size=args.batch_size, sampler=sampler_batch1, num_workers=args.num_workers) print('{:d} roidb1 entries'.format(len(roidb1))) # dataset-2 print('args.imdb_name2: ', args.imdb_name2) imdb2, roidb2, ratio_list2, ratio_index2 = combined_roidb(args.imdb_name2)
if group: lr *= cfg.CIOD.LEARNING_RATE_INIT_DISTILL if cfg.TRAIN.OPTIMIZER == 'adam': lr = lr * 0.1 set_learning_rate(optimizer, lr) if group: if cfg.CIOD.SWITCH_DO_IN_RPN and cfg.CIOD.SWITCH_FREEZE_RPN_CLASSIFIER: set_learning_rate(optimizer, 0.0, rpn_cls_params_index) if cfg.CIOD.SWITCH_FREEZE_BASE_NET: set_learning_rate(optimizer, 1e-6, base_net_params_index) fasterRCNN.train() # Get database, and merge the class proto imdb, roidb, ratio_list, ratio_index = combined_roidb( args.dataset, "trainvalStep{}".format(group), classes=cfg.CLASSES[:now_cls_high], ext=cfg.EXT, data_extra=flatten(class_proto[:now_cls_low], distinct=True)) train_size = len(roidb) sampler_batch = RcnnSampler(train_size, cfg.TRAIN.BATCH_SIZE) dataset = roibatchLoader(roidb, ratio_list, ratio_index, cfg.TRAIN.BATCH_SIZE, now_cls_high, training=True) dataloader = torch.utils.data.DataLoader( dataset, batch_size=cfg.TRAIN.BATCH_SIZE, sampler=sampler_batch,
args.set_cfgs = ['ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5, 1, 2]', \ 'POOLING_SIZE', '14', 'RESNET.LAYER4_STRIDE', '1', \ 'RESNET.BRANCH2B_DILATION', 'True', 'RPN.PROPOSAL_TYPE', 'bottom_up', \ 'TRAIN.MAX_SIZE', '1000'] args.cfg_file = "../../cfgs/res101_bottomup.yml" if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLPPED = False train_imdb, train_roidb, train_ratio_list, train_ratio_index = combined_roidb( args.imdb_name, False) val_imdb, val_roidb, val_ratio_list, val_ratio_index = combined_roidb( args.imdbval_name, False) print('Finish load imdb and roidb') dataset_classes = train_imdb.classes assert dataset_classes == val_imdb.classes # dataset_classes = val_imdb.classes fasterRCNN = resnet(dataset_dummy_classes, 101, pretrained=False, class_agnostic=args.class_agnostic) fasterRCNN.create_architecture() print('Finish buid faster architecture') # encoding as pkl saved by caffe is from python2.7, now is in python3.6
print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) torch.manual_seed(cfg.RNG_SEED) torch.cuda.manual_seed(cfg.RNG_SEED) if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda # for source domain imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) imdb_val, roidb_val, ratio_list_val, ratio_index_val = combined_roidb( args.imdbval_name, False) val_size = len(roidb_val) # for target domain tgt_imdb, tgt_roidb, tgt_ratio_list, tgt_ratio_index = combined_roidb( args.imdb_tgt_name) tgt_train_size = len(tgt_roidb) tgt_imdb_val, tgt_roidb_val, tgt_ratio_list_val, tgt_ratio_index_val = combined_roidb( args.imdbval_tgt_name, False) tgt_val_size = len(tgt_roidb_val) print() print('{:d} roidb entries for source domain'.format(len(roidb))) print('{:d} roidb entries for target domain'.format(len(tgt_roidb)))
def eval_result(args, logger, epoch, output_dir): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) args.batch_size = 1 imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) imdb.competition_mode(on=True) load_name = os.path.join(output_dir, 'thundernet_epoch_{}.pth'.format(epoch, )) layer = int(args.net.split("_")[1]) _RCNN = snet(imdb.classes, layer, pretrained_path=None, class_agnostic=args.class_agnostic) _RCNN.create_architecture() print("load checkpoint %s" % (load_name)) if args.cuda: checkpoint = torch.load(load_name) else: checkpoint = torch.load(load_name, map_location=lambda storage, loc: storage ) # Load all tensors onto the CPU _RCNN.load_state_dict(checkpoint['model']) im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # hm = torch.FloatTensor(1) # reg_mask = torch.LongTensor(1) # wh = torch.FloatTensor(1) # offset = torch.FloatTensor(1) # ind = torch.LongTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # hm = hm.cuda() # reg_mask = reg_mask.cuda() # wh = wh.cuda() # offset = offset.cuda() # ind = ind.cuda() # make variable with torch.no_grad(): im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) # hm = Variable(hm) # reg_mask = Variable(reg_mask) # wh = Variable(wh) # offset = Variable(offset) # ind = Variable(ind) if args.cuda: cfg.CUDA = True if args.cuda: _RCNN.cuda() start = time.time() max_per_image = 100 vis = True if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'thundernet' num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] output_dir = get_output_dir(imdb, save_name) # dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ # imdb.num_classes, training=False, normalize=False) # dataset = roibatchLoader(roidb, imdb.num_classes, training=False) dataset = Detection(roidb, num_classes=imdb.num_classes, transform=BaseTransform(cfg.TEST.SIZE, cfg.PIXEL_MEANS), training=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') _RCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) # hm.resize_(data[4].size()).copy_(data[4]) # reg_mask.resize_(data[5].size()).copy_(data[5]) # wh.resize_(data[6].size()).copy_(data[6]) # offset.resize_(data[7].size()).copy_(data[7]) # ind.resize_(data[8].size()).copy_(data[8]) det_tic = time.time() with torch.no_grad(): time_measure, \ rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = _RCNN(im_data, im_info, gt_boxes, num_boxes, # hm,reg_mask,wh,offset,ind ) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(args.batch_size, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) # pred_boxes /= data[1][0][2].item() pred_boxes[:, :, 0::2] /= data[1][0][2].item() pred_boxes[:, :, 1::2] /= data[1][0][3].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) # keep = soft_nms(cls_dets.cpu().numpy(), Nt=0.5, method=2) # keep = torch.as_tensor(keep, dtype=torch.long) cls_dets = cls_dets[keep.view(-1).long()] if vis: vis_detections(im2show, imdb.classes[j], color_list[j - 1].tolist(), cls_dets.cpu().numpy(), 0.6) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write( 'im_detect: {:d}/{:d}\tDetect: {:.3f}s (RPN: {:.3f}s, Pre-RoI: {:.3f}s, RoI: {:.3f}s, Subnet: {:.3f}s)\tNMS: {:.3f}s\r' \ .format(i + 1, num_images, detect_time, time_measure[0], time_measure[1], time_measure[2], time_measure[3], nms_time)) sys.stdout.flush() if vis and i % 200 == 0 and args.use_tfboard: im2show = im2show[:, :, ::-1] logger.add_image('pred_image_{}'.format(i), trans.ToTensor()(Image.fromarray( im2show.astype('uint8'))), global_step=i) # cv2.imwrite('result.png', im2show) # pdb.set_trace() # cv2.imshow('test', im2show) # cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') ap_50 = imdb.evaluate_detections(all_boxes, output_dir) logger.add_scalar("map_50", ap_50, global_step=epoch) end = time.time() print("test time: %0.4fs" % (end - start))
import pprint import sys import time import _init_paths import cv2 import numpy as np import torch from model.faster_rcnn.resnet import resnet from model.faster_rcnn.vgg16 import vgg16 from model.nms.nms_wrapper import nms from model.rpn.bbox_transform import bbox_transform_inv from model.rpn.bbox_transform import clip_boxes from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir from model.utils.net_utils import vis_detections from roi_data_layer.roibatchLoader import roibatchLoader from roi_data_layer.roidb import combined_roidb from torch.autograd import Variable det_file = '/storage/dldi/PyProjects/FasterRCNN4VidVRDT1/output/res101/vidor_2019_test/faster_rcnn_10/detections.pkl' save_name = 'faster_rcnn_10' imdb, roidb, ratio_list, ratio_index = combined_roidb('vidor_2019_train', False) imdb.competition_mode(on=True) output_dir = get_output_dir(imdb, save_name) with open(det_file, 'rb') as f: all_boxes = pickle.load(f) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir)
def evalute_model(Network, namedb, args): max_per_image = 100 # load test dataset imdb, roidb, ratio_list, ratio_index = combined_roidb(namedb, False) if args.frame in {"fpn", "faster_rcnn"}: dataset = objdetMulInSizeRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, cls_list=imdb.classes, augmentation=False) elif args.frame in {"ssd"}: dataset = objdetRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, cls_list=imdb.classes, augmentation=False) elif args.frame in {"ssd_vmrn", "vam"}: dataset = vmrdetRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, cls_list=imdb.classes, augmentation=False) elif args.frame in {"faster_rcnn_vmrn"}: dataset = vmrdetMulInSizeRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, cls_list=imdb.classes, augmentation=False) elif args.frame in {"fcgn"}: dataset = graspdetRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, cls_list=imdb.classes, augmentation=False) elif args.frame in {"all_in_one"}: dataset = allInOneMulInSizeRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, cls_list=imdb.classes, augmentation=False) elif args.frame in {"mgn"}: dataset = roigdetMulInSizeRoibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=False, cls_list=imdb.classes, augmentation=False) else: raise RuntimeError dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) num_images = len(imdb.image_index) output_dir = args.save_dir + "/" + args.dataset + "/" + args.net if args.vis: visualizer = dataViewer(imdb.classes) data_vis_dir = os.path.join(args.save_dir, args.dataset, 'data_vis', 'test') if not os.path.exists(data_vis_dir): os.makedirs(data_vis_dir) id_number_to_name = {} for r in roidb: id_number_to_name[r["img_id"]] = r["image"] start = time.time() # init variables all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] all_rel = [] all_grasp = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] Network.eval() empty_array= np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data_batch = next(data_iter) if args.cuda: data_batch = makeCudaData(data_batch) det_tic = time.time() # forward process if args.frame == 'faster_rcnn' or args.frame == 'fpn': rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, net_loss_cls, net_loss_bbox, rois_label = Network(data_batch) boxes = rois[:, :, 1:5] elif args.frame == 'ssd': bbox_pred, cls_prob, net_loss_bbox, net_loss_cls = Network(data_batch) boxes = Network.priors.type_as(bbox_pred).unsqueeze(0) elif args.frame == 'faster_rcnn_vmrn': rois, cls_prob, bbox_pred, rel_result, rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, RCNN_rel_loss_cls, rois_label = Network(data_batch) boxes = rois[:, :, 1:5] all_rel.append(rel_result) elif args.frame == 'ssd_vmrn' or args.frame == 'vam': bbox_pred, cls_prob, rel_result, loss_bbox, loss_cls, rel_loss_cls = Network(data_batch) boxes = Network.priors.type_as(bbox_pred) all_rel.append(rel_result) elif args.frame == 'fcgn': bbox_pred, cls_prob, loss_bbox, loss_cls, rois_label, boxes = Network(data_batch) elif args.frame == 'mgn': rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_box, loss_cls, loss_bbox, rois_label, grasp_loc, grasp_prob, \ grasp_bbox_loss, grasp_cls_loss, grasp_conf_label, grasp_all_anchors = Network(data_batch) boxes = rois[:, :, 1:5] elif args.frame == 'all_in_one': rois, cls_prob, bbox_pred, rel_result, rpn_loss_cls, rpn_loss_box, loss_cls, loss_bbox, rel_loss_cls, rois_label, \ grasp_loc, grasp_prob, grasp_bbox_loss, grasp_cls_loss, grasp_conf_label, grasp_all_anchors = Network(data_batch) boxes = rois[:, :, 1:5] all_rel.append(rel_result) det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() # collect results if args.frame in {'ssd', 'fpn', 'faster_rcnn', 'faster_rcnn_vmrn', 'ssd_vmrn', 'vam'}: # detected_box is a list of boxes. len(list) = num_classes det_box = objdet_inference(cls_prob[0].data, bbox_pred[0].data, data_batch[1][0].data, box_prior=boxes[0].data, class_agnostic=args.class_agnostic, n_classes=imdb.num_classes, for_vis=False) if args.vis: if args.frame not in {'faster_rcnn_vmrn', 'ssd_vmrn', 'vam'}: vis_boxes = objdet_inference(cls_prob[0].data, bbox_pred[0].data, data_batch[1][0].data, box_prior=boxes[0].data, class_agnostic=args.class_agnostic, n_classes=imdb.num_classes, for_vis=True) data_list = [data_batch[0][0], data_batch[1][0], torch.Tensor(vis_boxes)] else: det_res = all_rel[-1] if det_res[0].shape[0] > 0: vis_boxes = torch.cat([det_res[0], det_res[1].unsqueeze(1)], dim = 1) else: vis_boxes = torch.Tensor([]) rel_mat = rel_prob_to_mat(det_res[2], vis_boxes.size(0)) data_list = [data_batch[0][0], data_batch[1][0], vis_boxes, torch.Tensor([vis_boxes.size(0)]), torch.Tensor(rel_mat)] if max_per_image > 0: det_box = detection_filter(det_box, None, max_per_image) for j in xrange(1, imdb.num_classes): all_boxes[j][i] = det_box[j] elif args.frame in {'mgn', 'all_in_one'}: det_box, det_grasps = objgrasp_inference(cls_prob[0].data if cls_prob is not None else cls_prob, bbox_pred[0].data if bbox_pred is not None else bbox_pred, grasp_prob.data, grasp_loc.data, data_batch[1][0].data, rois[0].data, class_agnostic=args.class_agnostic, n_classes=imdb.num_classes, g_box_prior=grasp_all_anchors.data, for_vis=False, topN_g = 1) if args.vis: vis_boxes, vis_grasps = objgrasp_inference(cls_prob[0].data if cls_prob is not None else cls_prob, bbox_pred[0].data if bbox_pred is not None else bbox_pred, grasp_prob.data, grasp_loc.data, data_batch[1][0].data, rois[0].data, class_agnostic=args.class_agnostic, n_classes=imdb.num_classes, g_box_prior=grasp_all_anchors.data, for_vis=True, topN_g=5) if vis_boxes.shape[0] > 0: g_inds = torch.Tensor(np.arange(vis_boxes.shape[0])).unsqueeze(1).repeat(1, vis_grasps.shape[1]) + 1 else: g_inds = torch.Tensor([]) data_list = [data_batch[0][0], data_batch[1][0], torch.Tensor(vis_boxes), torch.Tensor(vis_grasps).view(-1, vis_grasps.shape[-1]), g_inds.long().view(-1)] if max_per_image > 0: det_box, det_grasps = detection_filter(det_box, det_grasps, max_per_image) for j in xrange(1, imdb.num_classes): all_boxes[j][i] = det_box[j] all_grasp[j][i] = det_grasps[j] elif args.frame in {'fcgn'}: det_grasps = grasp_inference(cls_prob[0].data, bbox_pred[0].data, data_batch[1][0].data, box_prior = boxes[0].data, topN = 1) all_grasp[1][i] = det_grasps if args.vis: data_list = [data_batch[0][0], data_batch[1][0], torch.Tensor(det_grasps)] else: raise RuntimeError("Illegal algorithm.") if args.vis: im_vis = vis_gt(data_list, visualizer, args.frame) img_name = id_number_to_name[data_batch[1][0][4].item()].split("/")[-1] # When using cv2.imwrite, channel order should be BGR cv2.imwrite(os.path.join(data_vis_dir, img_name), im_vis[:, :, ::-1]) misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() print('Evaluating detections') if args.frame in {'fcgn'} or 'cornell' in args.dataset or 'jacquard' in args.dataset: result = imdb.evaluate_detections(all_grasp, output_dir) else: result = imdb.evaluate_detections(all_boxes, output_dir) if args.frame in {'mgn', "all_in_one"}: # when using mgn in single-object grasp dataset, we only use accuracy to measure the performance instead of mAP. if 'cornell' in args.dataset or 'jacquard' in args.dataset: pass else: print('Evaluating grasp detection results') grasp_MRFPPI, mean_MRFPPI, key_point_MRFPPI, mAPgrasp = imdb.evaluate_multigrasp_detections(all_boxes, all_grasp) print('Mean Log-Average Miss Rate: %.4f' % np.mean(np.array(mean_MRFPPI))) result = mAPgrasp if args.frame in {"faster_rcnn_vmrn", "ssd_vmrn", "all_in_one"}: print('Evaluating relationships') orec, oprec, imgprec, imgprec_difobjnum = imdb.evaluate_relationships(all_rel) print("object recall: \t%.4f" % orec) print("object precision:\t%.4f" % oprec) print("image acc: \t%.4f" % imgprec) print("image acc for images with different object numbers (2,3,4,5):") print("%s\t%s\t%s\t%s\t" % tuple(imgprec_difobjnum)) result = imgprec # TODO: implement all_in_one's metric for evaluation end = time.time() print("test time: %0.4fs" % (end - start)) return result
pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda # SOURCE imdb_s, roidb_s, ratio_list_s, ratio_index_s = combined_roidb( args.imdb_source) train_size_s = len(roidb_s) print('%s: %d roidb entries' % (imdb_s.name, len(roidb_s))) sampler_batch_s = sampler(train_size_s, args.batch_size) dataset_s = roibatchLoader(roidb_s, ratio_list_s, ratio_index_s, 8, \ imdb_s.num_classes, training=True) dataloader_s = torch.utils.data.DataLoader(dataset_s, batch_size=8, sampler=sampler_batch_s, num_workers=args.num_workers) # TARGET
def test_adv(step_size=0.01, num_steps=0, dataset='coco', batch_size=1, weights='weights/voc_pretrained.npy', save=False, grad_cam=False): if save: p_t1 = 'detect_adv_normal' if not os.path.exists(p_t1): os.makedirs(p_t1) cfg_file = 'cfgs/res50.yml' cfg_from_file(cfg_file) cfg.POOLING_MODE = 'align' cfg.TRAIN.USE_FLIPPED = False init_seeds(cfg.RNG_SEED) if dataset == "pascal_voc": imdb_name = "voc_2007_trainval" imdbval_name = "voc_2007_test" set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif dataset == "pascal_voc_0712": imdb_name = "voc_2007_trainval+voc_2012_trainval" imdbval_name = "voc_2007_test" set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif dataset == "coco": imdb_name = "coco_2014_train+coco_2014_valminusminival" imdbval_name = "coco_2014_minival" set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif dataset == "imagenet": imdb_name = "imagenet_train" imdbval_name = "imagenet_val" set_cfgs = [ 'ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif dataset == "vg": imdb_name = "vg_150-50-50_minitrain" imdbval_name = "vg_150-50-50_minival" set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] imdb, roidb, ratio_list, ratio_index = combined_roidb(imdbval_name, training=False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) model = resnet(imdb.classes, 50, pretrained=False, class_agnostic=False) print("load checkpoint %s" % (weights)) if weights.endswith('.pt'): checkpoint = torch.load(weights) checkpoint['model'] = { k: v for k, v in checkpoint['model'].items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(checkpoint['model'], strict=True) elif weights.endswith('.npy'): checkpoint = np.load(weights, allow_pickle=True).item() model_dict = { k: torch.from_numpy(checkpoint[k]) for k in checkpoint.keys() if model.state_dict()[k].numel() == torch.from_numpy(checkpoint[k]).numel() } model.load_state_dict(model_dict, strict=True) # load_state_dict(fpn.state_dict(), checkpoint['state_dict']) model.cuda().eval() del checkpoint print('load model successfully!') if not grad_cam: for param in model.parameters(): param.requires_grad = False model_adv = PGD(model) max_per_image = 100 vis = False thresh = 0.001 iou_thre = 0.5 colors = [[random.randint(0, 255) for _ in range(3)] for _ in range(len(imdb.classes))] num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] save_name = 'v1' output_dir = get_output_dir(imdb, save_name) dataset = roibatchLoader(roidb, ratio_list, ratio_index, batch_size, \ imdb.num_classes, training=False, normalize=False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) im_data = torch.FloatTensor(1).cuda() im_info = torch.FloatTensor(1).cuda() num_boxes = torch.LongTensor(1).cuda() gt_boxes = torch.FloatTensor(1).cuda() for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) if vis or save: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) with torch.enable_grad(): if num_steps * step_size > 0: im_adv = model_adv.adv_sample_infer(im_data, im_info, gt_boxes, num_boxes, step_size, num_steps=num_steps) else: im_adv = im_data if save: file_name = imdb.image_path_at(i).split('/')[-1] if grad_cam: model.eval() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label, conv_output = model(im_adv, im_info, gt_boxes, num_boxes, grad_cam=True) one_hot_output = torch.zeros_like(cls_prob) one_hot_output[0][:, 0:-1] = 1 model.zero_grad() cls_prob.backward(gradient=one_hot_output, retain_graph=True) guided_gradients = model.gradients.cpu().data.numpy()[0] target = conv_output.cpu().data.numpy()[0] ws = np.mean(guided_gradients, axis=(1, 2)) # take averages for each gradient # create empty numpy array for cam cam = np.ones(target.shape[1:], dtype=np.float32) # multiply each weight with its conv output and then, sum for l, w in enumerate(ws): cam += w * target[l, :, :] cam = np.maximum(cam, 0) cam = (cam - np.min(cam)) / (np.max(cam) - np.min(cam) ) # normalize between 0-1 cam = np.uint8(cam * 255) # scale between 0-255 to visualize im_rgb = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cam = np.uint8( Image.fromarray(cam).resize( (im_rgb.shape[1], im_rgb.shape[0]), Image.ANTIALIAS)) / 255 original_image = Image.fromarray(im_rgb) save_class_activation_images(original_image, cam, file_name) rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = model(im_adv, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] box_deltas = bbox_pred.data box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(batch_size, -1, 4 * (imdb.num_classes - 1)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) scores = scores.squeeze() pred_boxes /= data[1][0][2].item() pred_boxes = pred_boxes.squeeze() nms_cfg = {'type': 'nms', 'iou_threshold': iou_thre} det_bboxes, det_labels = multiclass_nms(pred_boxes, scores, thresh, nms_cfg, max_per_image) keep = det_bboxes[:, 4] > thresh det_bboxes = det_bboxes[keep] det_labels = det_labels[keep] for j in xrange(0, imdb.num_classes - 1): inds = torch.nonzero(det_labels == j, as_tuple=False).view(-1) # if there is det if inds.numel() > 0: cls_dets = det_bboxes[inds] if vis or save: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), color=colors[int(j)]) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in xrange(0, imdb.num_classes - 1) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(0, imdb.num_classes - 1): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] if save: cv2.imwrite(os.path.join(p_t1, file_name.replace('jpg', 'png')), im2show) elif vis: cv2.imwrite('result.png', im2show) if i % 200 == 0: print(i, 'waiting.....') det_file = os.path.join(output_dir, 'detections.pkl') with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') mAP = imdb.evaluate_detections(all_boxes, output_dir) return mAP
'[0.5,1,2]', 'MAX_NUM_GT_BOXES', '100' ] args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdbtest_name, training=False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here.
def run(args): lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY try: xrange # Python 2 except NameError: xrange = range # Python 3 #args = parse_args() print('Called with args:') print(args) if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) np.random.seed(cfg.RNG_SEED) if args.dataset == "pascal_voc": args.imdb_name = "voc_2007_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "pascal_voc_0712": args.imdb_name = "voc_2007_trainval+voc_2012_trainval" args.imdbval_name = "voc_2007_test" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "coco": args.imdb_name = "coco_2014_train+coco_2014_valminusminival" args.imdbval_name = "coco_2014_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "imagenet": args.imdb_name = "imagenet_train" args.imdbval_name = "imagenet_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] elif args.dataset == "vg": args.imdb_name = "vg_150-50-50_minitrain" args.imdbval_name = "vg_150-50-50_minival" args.set_cfgs = [ 'ANCHOR_SCALES', '[16, 32, 64, 128]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] args.cfg_file = "cfgs/{}_ls.yml".format( args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False imdb, roidb, ratio_list, ratio_index = combined_roidb( args.imdbval_name, False) imdb.competition_mode(on=True) print('{:d} roidb entries'.format(len(roidb))) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16': fasterRCNN = vgg16(imdb.classes, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res101': fasterRCNN = resnet(imdb.classes, 101, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res50': fasterRCNN = resnet(imdb.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) elif args.net == 'res152': fasterRCNN = resnet(imdb.classes, 152, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) im_info = torch.FloatTensor(1) num_boxes = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() im_info = im_info.cuda() num_boxes = num_boxes.cuda() gt_boxes = gt_boxes.cuda() # make variable im_data = Variable(im_data) im_info = Variable(im_info) num_boxes = Variable(num_boxes) gt_boxes = Variable(gt_boxes) if args.cuda: cfg.CUDA = True if args.cuda: fasterRCNN.cuda() start = time.time() max_per_image = 100 vis = args.vis if vis: thresh = 0.05 else: thresh = 0.0 save_name = 'faster_rcnn_10' num_images = len(imdb.image_index) all_boxes = [[[] for _ in xrange(num_images)] for _ in xrange(imdb.num_classes)] #pdb.set_trace() output_dir = get_output_dir(imdb, save_name) dataset = roibatchLoader(roidb, ratio_list, ratio_index, 1, \ imdb.num_classes, training=False, normalize = False) dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0, pin_memory=True) data_iter = iter(dataloader) _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir, 'detections.pkl') fasterRCNN.eval() empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) for i in range(num_images): data = next(data_iter) with torch.no_grad(): im_data.resize_(data[0].size()).copy_(data[0]) im_info.resize_(data[1].size()).copy_(data[1]) gt_boxes.resize_(data[2].size()).copy_(data[2]) num_boxes.resize_(data[3].size()).copy_(data[3]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, RCNN_loss_bbox, \ rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes) scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev if args.class_agnostic: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) else: box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4 * len(imdb.classes)) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) else: # Simply repeat the boxes, once for each class pred_boxes = np.tile(boxes, (1, scores.shape[1])) pred_boxes /= data[1][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() if vis: im = cv2.imread(imdb.image_path_at(i)) im2show = np.copy(im) for j in xrange(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[:, j][inds] _, order = torch.sort(cls_scores, 0, True) if args.class_agnostic: cls_boxes = pred_boxes[inds, :] else: cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) # cls_dets = cls_dets[order] # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) # cls_dets = cls_dets[keep.view(-1).long()] cls_dets = cls_dets[order] # keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) keep = softnms_cpu_torch(cls_dets) # cls_dets = cls_dets[keep.view(-1).long()] cls_dets = keep if vis: im2show = vis_detections(im2show, imdb.classes[j], cls_dets.cpu().numpy(), 0.3) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array # Limit to max_per_image detections *over all classes* if max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in xrange(1, imdb.num_classes)]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in xrange(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] misc_toc = time.time() nms_time = misc_toc - misc_tic sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() if vis: cv2.imwrite('result.png', im2show) pdb.set_trace() #cv2.imshow('test', im2show) #cv2.waitKey(0) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, output_dir) end = time.time() print("test time: %0.4fs" % (end - start))
print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) ################################################################################################################################################### #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) print('train_size', train_size) print('{:d} roidb entries'.format(len(roidb))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers)
if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) unlabel_imdb, unlabel_roidb, unlabel_ratio_list, unlabel_ratio_index = combined_roidb(args.imdb_name_unlabel) unlabel_train_size = len(unlabel_roidb) save_coco_unflip = unlabel_roidb[0] save_coco_flip = unlabel_roidb[23079] with open("/home/user/JISOO/R-FCN.pytorch-master/data/coco/voc_included.txt") as f: lines = f.readlines() coco_roidb = [] coco_flip_roidb = [] for a in range(len(lines)):
num_boxes = tensor_holder(torch.LongTensor(1), cfg.CUDA, True) gt_boxes = tensor_holder(torch.FloatTensor(1), cfg.CUDA, True) max_per_image = 100 thresh = 0.05 if args.vis else 0.0 aps = [] # Train ALL groups, or just ONE group start_group, end_group = (0, cfg.CIOD.GROUPS) if args.group == -1 else (args.group, args.group + 1) for group in trange(start_group, end_group, desc="Group", leave=True): now_cls_low = cfg.NUM_CLASSES * group // cfg.CIOD.GROUPS + 1 now_cls_high = cfg.NUM_CLASSES * (group + 1) // cfg.CIOD.GROUPS + 1 imdb, roidb, ratio_list, ratio_index = combined_roidb( args.dataset, "{}Step{}a".format("trainval" if args.self_check else "test", group), classes=cfg.CLASSES[:now_cls_high], ext=cfg.EXT, training=False) imdb.competition_mode(on=True) tqdm.write('{:d} roidb entries'.format(len(roidb))) load_name = os.path.join( load_dir, 'faster_rcnn_{}_{}_{}_{}.pth'.format(args.session, args.net, args.dataset, group)) tqdm.write("load checkpoint {}".format(load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) class_means = checkpoint['cls_means'][:, :now_cls_high] cfg.POOLING_MODE = checkpoint['pooling_mode'] tqdm.write('load model successfully!') if cfg.CUDA: class_means = class_means.cuda()
pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True # cfg.TRAIN.USE_FLIPPED = False cfg.USE_GPU_NMS = args.cuda # source dataset imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) # target dataset imdb_t, roidb_t, ratio_list_t, ratio_index_t = combined_roidb( args.imdb_name_target) train_size_t = len(roidb_t) print('{:d} source roidb entries'.format(len(roidb))) print('{:d} target roidb entries'.format(len(roidb_t))) output_dir = args.save_dir + "/" + args.net + "/" + args.log_ckpt_name # output_dir = args.save_dir + "/" + args.net + "/" + args.dataset_t if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, args.batch_size)
def main(scene_img_path, query_img_path): if torch.cuda.is_available() and not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) np.random.seed(cfg.RNG_SEED) if args.dataset == "coco": args.imdb_name = "coco_2017_train" args.imdbval_name = "coco_2017_val" args.set_cfgs = [ 'ANCHOR_SCALES', '[4, 8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]' ] # args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) args.cfg_file = "cfgs/{}_{}.yml".format( args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format( args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False) imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False, seen=args.seen) # imdb_vs.competition_mode(on=True) imdb_vu.competition_mode(on=True) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'res50': fasterRCNN = resnet(imdb_vu.classes, 50, pretrained=False, class_agnostic=args.class_agnostic) else: print("network is not defined") pdb.set_trace() fasterRCNN.create_architecture() print("load checkpoint %s" % (load_name)) checkpoint = torch.load(load_name) fasterRCNN.load_state_dict(checkpoint['model']) if 'pooling_mode' in checkpoint.keys(): cfg.POOLING_MODE = checkpoint['pooling_mode'] print('load model successfully!') # initilize the tensor holder here. im_data = torch.FloatTensor(1) query = torch.FloatTensor(1) im_info = torch.FloatTensor(1) catgory = torch.LongTensor(1) gt_boxes = torch.FloatTensor(1) # ship to cuda if args.cuda: im_data = im_data.cuda() query = query.cuda() im_info = im_info.cuda() catgory = catgory.cuda() gt_boxes = gt_boxes.cuda() if args.cuda: cfg.CUDA = True fasterRCNN.cuda() max_per_image = 100 thresh = 0.05 # output_dir_vs = get_output_dir(imdb_vs, 'faster_rcnn_seen') output_dir_vu = get_output_dir(imdb_vu, 'faster_rcnn_unseen') all_weight = np.zeros((len(ratio_index_vu[0]), 1024)) all_times = np.zeros((imdb_vu.num_classes)) dataset_vu = roibatchLoader(roidb_vu, ratio_list_vu, ratio_index_vu, query_vu, 1, imdb_vu.num_classes, training=False, seen=args.seen) fasterRCNN.eval() avg = 0 dataset_vu.query_position = avg num_images_vu = len(imdb_vu.image_index) all_boxes = [[[] for _ in xrange(num_images_vu)] for _ in xrange(imdb_vu.num_classes)] _t = {'im_detect': time.time(), 'misc': time.time()} det_file = os.path.join(output_dir_vu, 'detections_%d_%d.pkl' % (args.seen, avg)) print(det_file) i = 0 index = 0 data = [0, 0, 0, 0, 0] # version = 'custom' # coco is completed # if version == 'coco': # im = imread('/home/yjyoo/PycharmProjects/data/coco/images/val2017/000000397133.jpg') # query_im = imread('/home/yjyoo/PycharmProjects/data/coco/images/val2017/000000007816.jpg') # query_im = crop(query_im, [505.54, 53.01, 543.08, 164.09], size=128) # else: im = imread(scene_img_path) im = cv2.resize(im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR) query_im = imread(query_img_path) query_im = cv2.resize(query_im, dsize=(640, 480), interpolation=cv2.INTER_LINEAR) _im = np.copy(im) _query_im = np.copy(query_im) # make im_data im, im_scale = prep_im_for_blob(im, target_size=600) im = torch.tensor(im) im = torch.unsqueeze(im, 0) im = im.transpose(1, 3) im_data = im.transpose(2, 3) # make query data query_im, query_im_scale = prep_im_for_blob(query_im, target_size=128) query_im = torch.tensor(query_im) query_im = torch.unsqueeze(query_im, 0) query_im = query_im.transpose(1, 3) query = query_im.transpose(2, 3) im_data = data[0] = im_data.cuda() query = data[1] = query.cuda() im_info = data[2] = torch.tensor([[600, 899, 1.4052]]) gt_boxes = data[3] = torch.rand(1, 4, 5) # don't care catgory = data[4] = torch.tensor([1]) det_tic = time.time() rois, cls_prob, bbox_pred, \ rpn_loss_cls, rpn_loss_box, \ RCNN_loss_cls, _, RCNN_loss_bbox, \ rois_label, weight = fasterRCNN(im_data, query, im_info, gt_boxes, catgory) # all_weight[data[4],:] = all_weight[data[4],:] + weight.view(-1).detach().cpu().numpy() all_weight[i, :] = weight.view(-1).detach().cpu().numpy() all_times[data[4]] = all_times[data[4]] + 1 scores = cls_prob.data boxes = rois.data[:, :, 1:5] if cfg.TEST.BBOX_REG: # Apply bounding-box regression deltas box_deltas = bbox_pred.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: # Optionally normalize targets by a precomputed mean and stdev box_deltas = box_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() box_deltas = box_deltas.view(1, -1, 4) pred_boxes = bbox_transform_inv(boxes, box_deltas, 1) pred_boxes = clip_boxes(pred_boxes, im_info.data, 1) pred_boxes /= data[2][0][2].item() scores = scores.squeeze() pred_boxes = pred_boxes.squeeze() det_toc = time.time() detect_time = det_toc - det_tic misc_tic = time.time() im2show = np.copy(_im) inds = torch.nonzero(scores > thresh).view(-1) # if there is det if inds.numel() > 0: cls_scores = scores[inds] _, order = torch.sort(cls_scores, 0, True) cls_boxes = pred_boxes[inds, :] cls_dets = torch.cat((cls_boxes, cls_scores.unsqueeze(1)), 1) # cls_dets = torch.cat((cls_boxes, cls_scores), 1) cls_dets = cls_dets[order] keep = nms(cls_boxes[order, :], cls_scores[order], cfg.TEST.NMS) cls_dets = cls_dets[keep.view(-1).long()] all_boxes[data[4]][index] = cls_dets.cpu().numpy() im2show = vis_detections(im2show, 'shot', cls_dets.cpu().numpy(), 0.8) _im2show = np.concatenate((im2show, _query_im), axis=1) plt.imshow(_im2show) plt.show() # Limit to max_per_image detections *over all classes* if max_per_image > 0: try: image_scores = all_boxes[data[4]][index][:, -1] if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] keep = np.where( all_boxes[data[4]][index][:, -1] >= image_thresh)[0] all_boxes[data[4]][index] = all_boxes[data[4]][index][keep, :] except: pass misc_toc = time.time() o_query = data[1][0].permute(1, 2, 0).contiguous().cpu().numpy() o_query *= [0.229, 0.224, 0.225] o_query += [0.485, 0.456, 0.406] o_query *= 255 o_query = o_query[:, :, ::-1] (h, w, c) = im2show.shape o_query = cv2.resize(o_query, (h, h), interpolation=cv2.INTER_LINEAR) o_query = cv2.cvtColor(o_query, cv2.COLOR_BGR2RGB) im2show = np.concatenate((im2show, o_query), axis=1) im2show = cv2.cvtColor(im2show, cv2.COLOR_BGR2RGB) cv2.imwrite('./test_img/%d.png' % (i), im2show)
if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = False cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers)
# args.cfg_file = "cfgs/{}_ls.yml".format(args.net) if args.large_scale else "cfgs/{}.yml".format(args.net) args.cfg_file = "cfgs/{}_{}.yml".format( args.net, args.group) if args.group != 0 else "cfgs/{}.yml".format( args.net) if args.cfg_file is not None: cfg_from_file(args.cfg_file) if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) cfg.TRAIN.USE_FLIPPED = False # imdb_vs, roidb_vs, ratio_list_vs, ratio_index_vs, query_vs = combined_roidb('coco_2014_valminusminival', False) imdb_vu, roidb_vu, ratio_list_vu, ratio_index_vu, query_vu = combined_roidb( args.imdbval_name, False, seen=args.seen) # imdb_vs.competition_mode(on=True) imdb_vu.competition_mode(on=True) input_dir = args.load_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(input_dir): raise Exception( 'There is no input directory for loading network from ' + input_dir) load_name = os.path.join( input_dir, 'faster_rcnn_{}_{}_{}.pth'.format(args.checksession, args.checkepoch, args.checkpoint)) # initilize the network here. if args.net == 'vgg16':
if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) print('{:d} roidb entries'.format(len(roidb))) output_dir = os.path.join(args.save_dir, args.arch, args.net, args.dataset) if not os.path.exists(output_dir): os.makedirs(output_dir) sampler_batch = sampler(train_size, args.batch_size) dataset = roibatchLoader(roidb, ratio_list, ratio_index, args.batch_size, \ imdb.num_classes, training=True) dataloader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, sampler=sampler_batch, num_workers=args.num_workers)
if args.set_cfgs is not None: cfg_from_list(args.set_cfgs) print('Using config:') pprint.pprint(cfg) np.random.seed(cfg.RNG_SEED) #torch.backends.cudnn.benchmark = True if torch.cuda.is_available() and not args.cuda: print("WARNING: You have a CUDA device, so you should probably run with --cuda") # train set # -- Note: Use validation set and disable the flipped to enable faster loading. cfg.TRAIN.USE_FLIPPED = True cfg.USE_GPU_NMS = args.cuda imdb, roidb, ratio_list, ratio_index = combined_roidb(args.imdb_name) train_size = len(roidb) """ Get imdb, roidb for target dataset... """ print("getting target values...\n") target_imdb, target_roidb, target_ratio_list, target_ratio_index = combined_roidb(args.target_imdb_name, False) target_size = len(target_roidb) print('{:d} roidb entries'.format(len(roidb))) output_dir = args.save_dir + "/" + args.net + "/" + args.dataset if not os.path.exists(output_dir):