def get_dataset(datasetnames): names = datasetnames.split('+') dataset = RoiDataset(get_imdb(names[0])) print('load dataset {}'.format(names[0])) for name in names[1:]: tmp = RoiDataset(get_imdb(name)) dataset += tmp print('load and add dataset {}'.format(name)) return dataset
def get_dataset(datasetnames): names = datasetnames.split( '+') # ['voc_2007_trainval', 'voc_2012_trainval'] dataset = RoiDataset(get_imdb(names[0])) # 'voc_2007_trainval' print('load dataset {}'.format(names[0])) for name in names[1:]: tmp = RoiDataset(get_imdb(name)) dataset += tmp print('load and add dataset {}'.format(name)) return dataset
def train(): # define the hyper parameters first args = parse_args() args.steplr_epoch = cfg.steplr_epoch args.steplr_factor = cfg.steplr_factor args.weight_decay = cfg.weight_decay args.momentum = cfg.momentum print('Called with args:') print(args) lr = args.lr # initial tensorboardX writer if args.use_tfboard: if args.exp_name == 'default': writer = SummaryWriter() else: writer = SummaryWriter('runs/' + args.exp_name) args.imdb_name = 'trainval' args.imdbval_name = 'trainval' output_dir = args.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) # load dataset print('loading dataset....') train_dataset = RoiDataset(get_imdb(args.imdb_name)) print('dataset loaded.') print('training rois number: {}'.format(len(train_dataset))) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=detection_collate, drop_last=True) # initialize the model print('initialize the model') tic = time.time() model = Yolov2(pretrained=True, arch=args.arch) toc = time.time() print('model loaded: cost time {:.2f}s'.format(toc - tic)) # initialize the optimizer optimizer = optim.SGD([{ "params": model.trunk.parameters(), "lr": args.lr * cfg.former_lr_decay }, { "params": model.conv3.parameters() }, { "params": model.conv4.parameters() }], lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = StepLR(optimizer, step_size=args.steplr_epoch, gamma=args.steplr_factor) if args.resume: print('resume training enable') resume_checkpoint_name = 'yolov2_epoch_{}.pth'.format( args.checkpoint_epoch) resume_checkpoint_path = os.path.join(output_dir, resume_checkpoint_name) print('resume from {}'.format(resume_checkpoint_path)) checkpoint = torch.load(resume_checkpoint_path) model.load_state_dict(checkpoint['model']) args.start_epoch = checkpoint['epoch'] + 1 lr = checkpoint['lr'] print('learning rate is {}'.format(lr)) adjust_learning_rate(optimizer, lr) if args.use_cuda: model.cuda() if args.mGPUs: model = nn.DataParallel(model) # set the model mode to train because we have some layer whose behaviors are different when in training and testing. # such as Batch Normalization Layer. model.train() iters_per_epoch = int(len(train_dataset) / args.batch_size) # start training for epoch in range(args.start_epoch, args.max_epochs + 1): loss_temp = 0 tic = time.time() train_data_iter = iter(train_dataloader) scheduler.step() lr = get_lr(optimizer) if cfg.multi_scale and epoch in cfg.epoch_scale: cfg.scale_range = cfg.epoch_scale[epoch] print('change scale range to {}'.format(cfg.scale_range)) for step in range(iters_per_epoch): if cfg.multi_scale and (step + 1) % cfg.scale_step == 0: scale_index = np.random.randint(*cfg.scale_range) cfg.input_size = cfg.input_sizes[scale_index] ##print('change input size {}'.format(cfg.input_size)) im_data, boxes, gt_classes, num_obj = next(train_data_iter) if args.use_cuda: im_data = im_data.cuda() boxes = boxes.cuda() gt_classes = gt_classes.cuda() num_obj = num_obj.cuda() im_data_variable = Variable(im_data) box_loss, iou_loss, class_loss = model(im_data_variable, boxes, gt_classes, num_obj, training=True) loss = box_loss.mean()+ iou_loss.mean() \ + class_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() loss_temp += loss.item() if (step + 1) % args.display_interval == 0: toc = time.time() loss_temp /= args.display_interval iou_loss_v = iou_loss.mean().item() box_loss_v = box_loss.mean().item() class_loss_v = class_loss.mean().item() log = "[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, iou_loss: %.4f, box_loss: %.4f, cls_loss: %.4f" \ % (epoch, step+1, iters_per_epoch, loss_temp, lr, iou_loss_v, box_loss_v, class_loss_v) print(log) logfile = os.path.join(output_dir, 'training_log.txt') with open(logfile, 'a') as f: print(log, file=f) if args.use_tfboard: n_iter = (epoch - 1) * iters_per_epoch + step + 1 writer.add_scalar('lr', lr, n_iter) writer.add_scalar('losses/loss', loss_temp, n_iter) writer.add_scalar('losses/iou_loss', iou_loss_v, n_iter) writer.add_scalar('losses/box_loss', box_loss_v, n_iter) writer.add_scalar('losses/cls_loss', class_loss_v, n_iter) loss_temp = 0 tic = time.time() if epoch % args.save_interval == 0: save_name = os.path.join(output_dir, 'yolov2_epoch_{}.pth'.format(epoch)) torch.save( { 'model': model.module.state_dict() if args.mGPUs else model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)
def train(): args = parse_args() args.decay_lrs = cfg.TRAIN.DECAY_LRS cfg.USE_GPU_NMS = True if args.use_cuda else False assert args.batch_size == 1, 'Only support single batch' lr = cfg.TRAIN.LEARNING_RATE momentum = cfg.TRAIN.MOMENTUM weight_decay = cfg.TRAIN.WEIGHT_DECAY gamma = cfg.TRAIN.GAMMA # initial tensorboardX writer if args.use_tfboard: if args.exp_name == 'default': writer = SummaryWriter() else: writer = SummaryWriter('runs/' + args.exp_name) if args.dataset == 'voc07trainval': args.imdb_name = 'voc_2007_trainval' args.imdbval_name = 'voc_2007_test' elif args.dataset == 'voc0712trainval': args.imdb_name = 'voc_2007_trainval+voc_2012_trainval' args.imdbval_name = 'voc_2007_test' else: raise NotImplementedError if args.net == 'res50': fname = 'resnet50-caffe.pth' elif args.net == 'res101': fname = 'resnet101-caffe.pth' else: raise NotImplementedError args.pretrained_model = os.path.join('data', 'pretrained', fname) output_dir = args.output_dir if not os.path.exists(output_dir): os.makedirs(output_dir) # dataset_cachefile = os.path.join(output_dir, 'dataset.pickle') # if not os.path.exists(dataset_cachefile): # imdb, roidb = combined_roidb(args.imdb_name) # cache = [imdb, roidb] # with open(dataset_cachefile, 'wb') as f: # pickle.dump(cache, f) # print('save dataset cache') # else: # with open(dataset_cachefile, 'rb') as f: # cache = pickle.load(f) # imdb, roidb = cache[0], cache[1] # print('loaded dataset from cache') imdb, roidb = combined_roidb(args.imdb_name) train_dataset = RoiDataset(roidb) train_dataloader = DataLoader(train_dataset, args.batch_size, shuffle=True) model = FasterRCNN(backbone=args.net, pretrained=args.pretrained_model) print('model loaded') # if cfg.PRETRAINED_RPN: # rpn_model_path = 'output/rpn.pth' # model.load_state_dict(torch.load(rpn_model_path)['model']) # print('loaded rpn!') # optimizer params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), \ 'weight_decay': cfg.TRAIN.BIAS_DECAY and weight_decay or 0}] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': weight_decay }] optimizer = SGD(params, momentum=momentum) if args.use_cuda: model = model.cuda() model.train() iters_per_epoch = int(len(train_dataset) / args.batch_size) # start training for epoch in range(args.start_epoch, args.max_epochs + 1): loss_temp = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0 tic = time.time() train_data_iter = iter(train_dataloader) if epoch in args.decay_lrs: lr = lr * gamma adjust_learning_rate(optimizer, lr) print('adjust learning rate to {}'.format(lr)) for step in range(iters_per_epoch): im_data, gt_boxes, im_info = next(train_data_iter) if args.use_cuda: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) output = model(im_data_variable, gt_boxes, im_info) rois, _, _, \ rcnn_cls_loss, rcnn_box_loss, \ rpn_cls_loss, rpn_box_loss, _train_info = output loss = rcnn_cls_loss.mean() + rcnn_box_loss.mean() +\ rpn_cls_loss.mean() + rpn_box_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() loss_temp += loss.item() if cfg.VERBOSE: rpn_tp += _train_info['rpn_tp'] rpn_tn += _train_info['rpn_tn'] rpn_fg += _train_info['rpn_num_fg'] rpn_bg += _train_info['rpn_num_bg'] rcnn_tp += _train_info['rcnn_tp'] rcnn_tn += _train_info['rcnn_tn'] rcnn_fg += _train_info['rcnn_num_fg'] rcnn_bg += _train_info['rcnn_num_bg'] if (step + 1) % args.display_interval == 0: toc = time.time() loss_temp /= args.display_interval rpn_cls_loss_v = rpn_cls_loss.mean().item() rpn_box_loss_v = rpn_box_loss.mean().item() rcnn_cls_loss_v = rcnn_cls_loss.mean().item() rcnn_box_loss_v = rcnn_box_loss.mean().item() print("[epoch %2d][step %4d/%4d] loss: %.4f, lr: %.2e, time cost %.1fs" \ % (epoch, step+1, iters_per_epoch, loss_temp, lr, toc - tic)) print("\t\t\t rpn_cls_loss_v: %.4f, rpn_box_loss_v: %.4f\n\t\t\t " "rcnn_cls_loss_v: %.4f, rcnn_box_loss_v: %.4f" \ % (rpn_cls_loss_v, rpn_box_loss_v, rcnn_cls_loss_v, rcnn_box_loss_v)) if cfg.VERBOSE: print('\t\t\t RPN : [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' % (rpn_fg, rpn_bg, float(rpn_tp) / rpn_fg, float(rpn_tn) / rpn_bg)) print('\t\t\t RCNN: [FG/BG] [%d/%d], FG: %.4f, BG: %.4f' % (rcnn_fg, rcnn_bg, float(rcnn_tp) / rcnn_fg, float(rcnn_tn) / rcnn_bg)) if args.use_tfboard: n_iter = (epoch - 1) * iters_per_epoch + step + 1 writer.add_scalar('losses/loss', loss_temp, n_iter) writer.add_scalar('losses/rpn_cls_loss_v', rpn_cls_loss_v, n_iter) writer.add_scalar('losses/rpn_box_loss_v', rpn_box_loss_v, n_iter) writer.add_scalar('losses/rcnn_cls_loss_v', rcnn_cls_loss_v, n_iter) writer.add_scalar('losses/rcnn_box_loss_v', rcnn_box_loss_v, n_iter) if cfg.VERBOSE: writer.add_scalar('rpn/fg_acc', float(rpn_tp) / rpn_fg, n_iter) writer.add_scalar('rpn/bg_acc', float(rpn_tn) / rpn_bg, n_iter) writer.add_scalar('rcnn/fg_acc', float(rcnn_tp) / rcnn_fg, n_iter) writer.add_scalar('rcnn/bg_acc', float(rcnn_tn) / rcnn_bg, n_iter) loss_temp = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 rcnn_tp, rcnn_tn, rcnn_fg, rcnn_bg = 0, 0, 0, 0 tic = time.time() if epoch % args.save_interval == 0: save_name = os.path.join( output_dir, 'faster_{}_epoch_{}.pth'.format(args.net, epoch)) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)
def test(): args = parse_args() args.conf_thresh = 0.005 args.nms_thresh = 0.45 if args.vis: args.conf_thresh = 0.5 print('Called with args:') print(args) # prepare dataset val_imdb = get_imdb(args.dataset) val_dataset = RoiDataset(val_imdb, train=False) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) # load model model = Yolov2(arch=args.arch) # weight_loader = WeightLoader() # weight_loader.load(model, 'yolo-voc.weights') # print('loaded') model_path = os.path.join(args.output_dir, args.model_name + '.pth') print('loading model from {}'.format(model_path)) if torch.cuda.is_available(): checkpoint = torch.load(model_path) else: checkpoint = torch.load(model_path, map_location='cpu') model.load_state_dict(checkpoint['model']) if args.use_cuda: model.cuda() model.eval() print('model loaded') dataset_size = len(val_imdb.image_index) print('classes: ', val_imdb.num_classes) all_boxes = [[[] for _ in range(dataset_size)] for _ in range(val_imdb.num_classes)] det_file = os.path.join(args.output_dir, 'detections.pkl') results = [] img_id = -1 with torch.no_grad(): for batch, (im_data, im_infos) in enumerate(val_dataloader): if args.use_cuda: im_data_variable = Variable(im_data).cuda() else: im_data_variable = Variable(im_data) yolo_outputs = model(im_data_variable) for i in range(im_data.size(0)): img_id += 1 output = [item[i].data for item in yolo_outputs] im_info = {'width': im_infos[i][0], 'height': im_infos[i][1]} detections = yolo_eval(output, im_info, conf_threshold=args.conf_thresh, nms_threshold=args.nms_thresh) if img_id % 100 == 0: print('im detect [{}/{}]'.format(img_id + 1, len(val_dataset))) if len(detections) > 0: for cls in range(val_imdb.num_classes): inds = torch.nonzero(detections[:, -1] == cls).view(-1) if inds.numel() > 0: cls_det = torch.zeros((inds.numel(), 5)) cls_det[:, :4] = detections[inds, :4] cls_det[:, 4] = detections[inds, 4] * detections[inds, 5] all_boxes[cls][img_id] = cls_det.cpu().numpy() img = Image.open(val_imdb.image_path_at(img_id)) if len(detections) > 0: detect_result = {} boxes = detections[:, :5].cpu().numpy() classes = detections[:, -1].long().cpu().numpy() class_names = val_imdb.classes num_boxes = boxes.shape[0] labels = [] for i in range(num_boxes): det_bbox = tuple( np.round(boxes[i, :4]).astype(np.int64)) score = boxes[i, 4] gt_class_ind = classes[i] class_name = class_names[gt_class_ind] disp_str = '{}: {:.2f}'.format(class_name, score) bbox = tuple(np.round(boxes[i, :4]).astype(np.int64)) xmin = bbox[0] ymin = bbox[1] xmax = bbox[2] ymax = bbox[3] box2d = {} box2d["x1"] = str(xmin) box2d["y1"] = str(ymin) box2d["x2"] = str(xmax) box2d["y2"] = str(ymax) bbox = {} bbox["box2d"] = box2d bbox["category"] = class_name labels.append(bbox) detect_result["ImageID"] = os.path.basename( val_imdb.image_path_at(img_id)) detect_result["labels"] = labels results.append(detect_result) if args.vis: img = Image.open(val_imdb.image_path_at(img_id)) if len(detections) == 0: continue det_boxes = detections[:, :5].cpu().numpy() det_classes = detections[:, -1].long().cpu().numpy() im2show = draw_detection_boxes( img, det_boxes, det_classes, class_names=val_imdb.classes) plt.figure() plt.imshow(im2show) plt.show() #if img_id > 10: # break print(results) results_file = os.path.join(args.output_dir, 'detections.json') with open(results_file, 'w') as f: json.dump(results, f, ensure_ascii=False, indent=4, sort_keys=True, separators=(',', ': ')) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) val_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)
def train(): args = parse_args() lr = args.lr decay_lrs = args.decay_lrs momentum = args.momentum weight_decay = args.weight_decay bais_decay = args.bais_decay gamma = args.gamma cfg.USE_GPU_NMS = True if args.use_gpu else False if args.use_tfboard: writer = SummaryWriter() # load data print('load data') if args.dataset == 'voc07trainval': dataset_name = 'voc_2007_trainval' elif args.dataset == 'voc12trainval': dataset_name = 'voc_2012_trainval' elif args.dataset == 'voc0712trainval': dataset_name = 'voc_2007_trainval+voc_2012_trainval' else: raise NotImplementedError imdb, roidb = combined_roidb(dataset_name) train_dataset = RoiDataset(roidb) train_dataloader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) iter_per_epoch = int(len(train_dataset) / args.batch_size) # prepare model print('load model') model = FasterRCNN(backbone=args.backbone) params = [] for key, value in dict(model.named_parameters()).items(): if value.requires_grad: if 'bias' in key: params += [{ 'params': [value], 'lr': lr * (cfg.TRAIN.DOUBLE_BIAS + 1), 'weight_decay': bais_decay and weight_decay or 0 }] else: params += [{ 'params': [value], 'lr': lr, 'weight_decay': weight_decay }] if args.use_gpu: model = model.cuda() model.train() # define optimizer optimizer = SGD(params, momentum=momentum) # training print('start training...') for epoch in range(args.epochs): start_time = time.time() train_data_iter = iter(train_dataloader) temp_loss = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0 if epoch in decay_lrs: lr = lr * gamma adjust_lr(optimizer, lr) print('adjusting learning rate to {}'.format(lr)) for step in range(iter_per_epoch): im_data, gt_boxes, im_info = next(train_data_iter) if args.use_gpu: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) outputs = model(im_data_variable, gt_boxes, im_info) rois, _, _, faster_rcnn_cls_loss, faster_rcnn_reg_loss, \ rpn_cls_loss, rpn_reg_loss, _train_info = outputs loss = faster_rcnn_cls_loss.mean() + faster_rcnn_reg_loss.mean() + \ rpn_cls_loss.mean() + rpn_reg_loss.mean() optimizer.zero_grad() loss.backward() optimizer.step() temp_loss += loss.item() if cfg.VERBOSE: rpn_tp += _train_info['rpn_tp'] rpn_tn += _train_info['rpn_tn'] rpn_fg += _train_info['rpn_num_fg'] rpn_bg += _train_info['rpn_num_bg'] faster_rcnn_tp += _train_info['faster_rcnn_tp'] faster_rcnn_tn += _train_info['faster_rcnn_tn'] faster_rcnn_fg += _train_info['faster_rcnn_num_fg'] faster_rcnn_bg += _train_info['faster_rcnn_num_bg'] if (step + 1) % args.display_interval == 0: end_time = time.time() temp_loss /= args.display_interval rpn_cls_loss_m = rpn_cls_loss.mean().item() rpn_reg_loss_m = rpn_reg_loss.mean().item() faster_rcnn_cls_loss_m = faster_rcnn_cls_loss.mean().item() faster_rcnn_reg_loss_m = faster_rcnn_reg_loss.mean().item() print('[epoch %2d][step %4d/%4d] loss: %.4f, time_cost: %.1f' % (epoch, step + 1, iter_per_epoch, temp_loss, end_time - start_time)) print( 'loss: rpn_cls_loss_m: %.4f, rpn_reg_loss_m: %.4f, faster_rcnn_cls_loss_m: %.4f, faster_rcnn_reg_loss_m: %.4f' % (rpn_cls_loss_m, rpn_reg_loss_m, faster_rcnn_cls_loss_m, faster_rcnn_reg_loss_m)) if args.use_tfboard: n_iter = epoch * iter_per_epoch + step + 1 writer.add_scalar('losses/loss', temp_loss, n_iter) writer.add_scalar('losses/rpn_cls_loss_m', rpn_cls_loss_m, n_iter) writer.add_scalar('losses/rpn_reg_loss_m', rpn_reg_loss_m, n_iter) writer.add_scalar('losses/faster_rcnn_cls_loss_m', faster_rcnn_cls_loss_m, n_iter) writer.add_scalar('losses/faster_rcnn_reg_loss_m', faster_rcnn_reg_loss_m, n_iter) if cfg.VERBOSE: writer.add_scalar('rpn/fg_acc', float(rpn_tp) / rpn_fg, n_iter) writer.add_scalar('rpn/bg_acc', float(rpn_tn) / rpn_bg, n_iter) writer.add_scalar( 'rcnn/fg_acc', float(faster_rcnn_tp) / faster_rcnn_fg, n_iter) writer.add_scalar( 'rcnn/bg_acc', float(faster_rcnn_tn) / faster_rcnn_bg, n_iter) temp_loss = 0 rpn_tp, rpn_tn, rpn_fg, rpn_bg = 0, 0, 0, 0 faster_rcnn_tp, faster_rcnn_tn, faster_rcnn_fg, faster_rcnn_bg = 0, 0, 0, 0 start_time = time.time() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) if epoch % args.save_interval == 0: save_name = os.path.join( args.output_dir, 'faster_rcnn101_epoch_{}.pth'.format(epoch)) torch.save({ 'model': model.state_dict(), 'epoch': epoch, 'lr': lr }, save_name)
def test(): args = parse_args() args.conf_thresh = 0.005 args.nms_thresh = 0.45 if args.vis: args.conf_thresh = 0.5 print('Called with args:') print(args) # prepare dataset if args.dataset == 'voc07trainval': args.imdbval_name = 'voc_2007_trainval' elif args.dataset == 'voc07test': args.imdbval_name = 'voc_2007_test' else: raise NotImplementedError val_imdb = get_imdb(args.imdbval_name) val_dataset = RoiDataset(val_imdb, train=False) val_dataloader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) # load model model = Yolov2() # weight_loader = WeightLoader() # weight_loader.load(model, 'yolo-voc.weights') # print('loaded') model_path = os.path.join(args.output_dir, args.model_name + '.pth') print('loading model from {}'.format(model_path)) if torch.cuda.is_available(): checkpoint = torch.load(model_path) else: checkpoint = torch.load(model_path, map_location='cpu') model.load_state_dict(checkpoint['model']) if args.use_cuda: model.cuda() model.eval() print('model loaded') dataset_size = len(val_imdb.image_index) all_boxes = [[[] for _ in range(dataset_size)] for _ in range(val_imdb.num_classes)] det_file = os.path.join(args.output_dir, 'detections.pkl') img_id = -1 with torch.no_grad(): for batch, (im_data, im_infos) in enumerate(val_dataloader): if args.use_cuda: im_data_variable = Variable(im_data).cuda() else: im_data_variable = Variable(im_data) yolo_outputs = model(im_data_variable) for i in range(im_data.size(0)): img_id += 1 output = [item[i].data for item in yolo_outputs] im_info = {'width': im_infos[i][0], 'height': im_infos[i][1]} detections = yolo_eval(output, im_info, conf_threshold=args.conf_thresh, nms_threshold=args.nms_thresh) print('im detect [{}/{}]'.format(img_id + 1, len(val_dataset))) if len(detections) > 0: for cls in range(val_imdb.num_classes): inds = torch.nonzero(detections[:, -1] == cls).view(-1) if inds.numel() > 0: cls_det = torch.zeros((inds.numel(), 5)) cls_det[:, :4] = detections[inds, :4] cls_det[:, 4] = detections[inds, 4] * detections[inds, 5] all_boxes[cls][img_id] = cls_det.cpu().numpy() if args.vis: img = Image.open(val_imdb.image_path_at(img_id)) if len(detections) == 0: continue det_boxes = detections[:, :5].cpu().numpy() det_classes = detections[:, -1].long().cpu().numpy() im2show = draw_detection_boxes( img, det_boxes, det_classes, class_names=val_imdb.classes) plt.figure() plt.imshow(im2show) plt.show() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) val_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)
def get_dataset(datasetnames): names = datasetnames dataset = RoiDataset(get_imdb(names)) return dataset
def test(): args = parse_args() # perpare data print('load data') if args.dataset == 'voc07test': dataset_name = 'voc_2007_test' elif args.dataset == 'voc12test': dataset_name = 'voc_2012_test' else: raise NotImplementedError cfg.TRAIN.USE_FLIPPED = False imdb, roidb = combined_roidb(dataset_name) test_dataset = RoiDataset(roidb) test_dataloader = DataLoader(dataset=test_dataset, batch_size=1, shuffle=False) test_data_iter = iter(test_dataloader) # load model model = FasterRCNN(backbone=args.backbone) model_name = '0712_faster_rcnn101_epoch_{}.pth'.format(args.check_epoch) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) model_path = os.path.join(args.output_dir, model_name) model.load_state_dict(torch.load(model_path)['model']) if args.use_gpu: model = model.cuda() model.eval() num_images = len(imdb.image_index) det_file_path = os.path.join(args.output_dir, 'detections.pkl') all_boxes = [[[] for _ in range(num_images)] for _ in range(imdb.num_classes)] empty_array = np.transpose(np.array([[], [], [], [], []]), (1, 0)) torch.set_grad_enabled(False) for i in range(num_images): start_time = time.time() im_data, gt_boxes, im_info = next(test_data_iter) if args.use_gpu: im_data = im_data.cuda() gt_boxes = gt_boxes.cuda() im_info = im_info.cuda() im_data_variable = Variable(im_data) det_tic = time.time() rois, faster_rcnn_cls_prob, faster_rcnn_reg, _, _, _, _, _ = model( im_data_variable, gt_boxes, im_info) scores = faster_rcnn_cls_prob.data boxes = rois.data[:, 1:] boxes_deltas = faster_rcnn_reg.data if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: boxes_deltas = boxes_deltas.view(-1, 4) * torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_STDS).cuda() \ + torch.FloatTensor(cfg.TRAIN.BBOX_NORMALIZE_MEANS).cuda() boxes_deltas = boxes_deltas.view(-1, 4 * imdb.num_classes) pred_boxes = bbox_transform_inv_cls(boxes, boxes_deltas) pred_boxes = clip_boxes_cls(pred_boxes, im_info[0]) pred_boxes /= im_info[0][2].item() det_toc = time.time() detect_time = det_tic - det_toc nms_tic = time.time() if args.vis: im_show = Image.open(imdb.image_path_at(i)) for j in range(1, imdb.num_classes): inds = torch.nonzero(scores[:, j] > args.thresh).view(-1) if inds.numel() > 0: cls_score = scores[:, j][inds] _, order = torch.sort(cls_score, 0, True) cls_boxes = pred_boxes[inds][:, j * 4:(j + 1) * 4] cls_dets = torch.cat((cls_boxes, cls_score.unsqueeze(1)), 1) cls_dets = cls_dets[order] keep = nms(cls_dets, 0.3) cls_dets = cls_dets[keep.view(-1).long()] if args.vis: cls_name_dets = np.repeat(j, cls_dets.size(0)) im_show = draw_detection_boxes(im_show, cls_dets.cpu().numpy(), cls_name_dets, imdb.classes, 0.5) all_boxes[j][i] = cls_dets.cpu().numpy() else: all_boxes[j][i] = empty_array if args.max_per_image > 0: image_scores = np.hstack( [all_boxes[j][i][:, -1] for j in range(1, imdb.num_classes)]) if len(image_scores) > args.max_per_image: image_thresh = np.sort(image_scores)[-args.max_per_image] for j in range(1, imdb.num_classes): keep = np.where(all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] if args.vis: plt.imshow(im_show) plt.show() nms_toc = time.time() nms_time = nms_tic - nms_toc sys.stdout.write('im_detect: {:d}/{:d} {:.3f}s {:.3f}s \r' \ .format(i + 1, num_images, detect_time, nms_time)) sys.stdout.flush() with open(det_file_path, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') imdb.evaluate_detections(all_boxes, args.output_dir) end_time = time.time() print("test time: %0.4fs" % (end_time - start_time))
def test(): args = parse_args() if args.vis: args.conf_thresh = 0.5 # load test data if args.dataset == 'voc07test': dataset_name = 'voc_2007_test' elif args.dataset == 'voc12test': dataset_name = 'voc_2012_test' else: raise NotImplementedError test_imdb = get_imdb(dataset_name) test_dataset = RoiDataset(test_imdb, train=False) test_dataloader = DataLoader(test_dataset, batch_size=args.batch_size, num_workers=args.num_workers, shuffle=False) # load model model = YOLOv2() if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) weight_file_path = os.path.join( args.output_dir, 'yolov2_epoch_{}.pth'.format(args.check_epoch)) if torch.cuda.is_available: state_dict = torch.load(weight_file_path) else: state_dict = torch.load(weight_file_path, map_location='cpu') model.load_state_dict(state_dict['model']) if args.use_cuda: model = model.cuda() model.eval() num_data = len(test_dataset) all_boxes = [[[] for _ in range(num_data)] for _ in range(test_imdb.num_classes)] img_id = -1 det_file = os.path.join(args.output_dir, 'detections.pkl') with torch.no_grad(): for batch_size, (im_data, im_infos) in enumerate(test_dataloader): if args.use_cuda: im_data = im_data.cuda() im_infos = im_infos.cuda() im_data_variable = Variable(im_data) outputs = model(im_data_variable) for i in range(im_data.size(0)): img_id += 1 output = [item[i].data for item in outputs] im_info = im_infos[i] detections = eval(output, im_info, args.conf_thresh, args.nms_thresh) if len(detections) > 0: for i in range(cfg.CLASS_NUM): idxs = torch.nonzero(detections[:, -1] == i).view(-1) if idxs.numel() > 0: cls_det = torch.zeros((idxs.numel(), 5)) cls_det[:, :4] = detections[idxs, :4] cls_det[:, 4] = detections[idxs, 4] * detections[idxs, 5] all_boxes[i][img_id] = cls_det.cpu().numpy() if args.vis: img = Image.open(test_imdb.image_path_at(img_id)) if len(detections) == 0: continue det_boxes = detections[:, :5].cpu().numpy() det_classes = detections[:, -1].long().cpu().numpy() imshow = draw_detection_boxes( img, det_boxes, det_classes, class_names=test_imdb.classes) plt.figure() plt.imshow(imshow) plt.show() with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) test_imdb.evaluate_detections(all_boxes, output_dir=args.output_dir)