def evaluate(self, model, half=False, distributed=False): """ COCO average precision (AP) Evaluation. Iterate inference on the test dataset and the results are evaluated by COCO API. Args: model : model object Returns: ap50_95 (float) : calculated COCO AP for IoU=50:95 ap50 (float) : calculated COCO AP for IoU=50 """ if isinstance(model, apex.parallel.DistributedDataParallel): model = model.module distributed=True model=model.eval() cuda = torch.cuda.is_available() if half: Tensor = torch.cuda.HalfTensor if cuda else torch.HalfTensor else: Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor ids = [] data_dict = [] img_num = 0 indices = list(range(self.num_images)) if distributed: dis_indices = indices[distributed_util.get_rank()::distributed_util.get_world_size()] else: dis_indices = indices progress_bar = tqdm if distributed_util.is_main_process() else iter num_classes = 80 if not self.voc else 20 inference_time=0 nms_time=0 n_samples=len(dis_indices)-10 for k, i in enumerate(progress_bar(dis_indices)): img, _, info_img, id_ = self.dataset[i] # load a batch info_img = [float(info) for info in info_img] id_ = int(id_) ids.append(id_) with torch.no_grad(): img = Variable(img.type(Tensor).unsqueeze(0)) if k > 9: start=time.time() if self.vis: outputs,fuse_weights,fused_f = model(img) else: outputs = model(img) if k > 9: infer_end=time.time() inference_time += (infer_end-start) outputs = postprocess( outputs, num_classes, self.confthre, self.nmsthre) if k > 9: nms_end=time.time() nms_time +=(nms_end-infer_end) if outputs[0] is None: continue outputs = outputs[0].cpu().data bboxes = outputs[:, 0:4] bboxes[:, 0::2] *= info_img[0] / self.img_size[0] bboxes[:, 1::2] *= info_img[1] / self.img_size[1] bboxes[:, 2] = bboxes[:,2] - bboxes[:,0] bboxes[:, 3] = bboxes[:,3] - bboxes[:,1] cls = outputs[:, 6] scores = outputs[:, 4]* outputs[:,5] for ind in range(bboxes.shape[0]): label = self.dataset.class_ids[int(cls[ind])] A = {"image_id": id_, "category_id": label, "bbox": bboxes[ind].numpy().tolist(), "score": scores[ind].numpy().item(), "segmentation": []} # COCO json format data_dict.append(A) if self.vis: o_img,_,_,_ = self.dataset.pull_item(i) make_vis('COCO', i, o_img, fuse_weights, fused_f) class_names = self.dataset._classes make_pred_vis('COCO', i, o_img, class_names, bboxes, cls, scores) if DEBUG and distributed_util.is_main_process(): o_img,_ = self.dataset.pull_item(i) class_names = self.dataset._classes make_pred_vis('COCO', i, o_img, class_names, bboxes, cls, scores) if distributed: distributed_util.synchronize() data_dict = _accumulate_predictions_from_multiple_gpus(data_dict) inference_time = torch.FloatTensor(1).type(Tensor).fill_(inference_time) nms_time = torch.FloatTensor(1).type(Tensor).fill_(nms_time) n_samples = torch.LongTensor(1).type(Tensor).fill_(n_samples) distributed_util.synchronize() torch.distributed.reduce(inference_time, dst=0) torch.distributed.reduce(nms_time, dst=0) torch.distributed.reduce(n_samples, dst=0) inference_time = inference_time.item() nms_time = nms_time.item() n_samples = n_samples.item() if not distributed_util.is_main_process(): return 0, 0 print('Main process Evaluating...') annType = ['segm', 'bbox', 'keypoints'] a_infer_time = 1000*inference_time / (n_samples) a_nms_time= 1000*nms_time / (n_samples) print('Average forward time: %.2f ms, Average NMS time: %.2f ms, Average inference time: %.2f ms' %(a_infer_time, \ a_nms_time, (a_infer_time+a_nms_time))) # Evaluate the Dt (detection) json comparing with the ground truth if len(data_dict) > 0: cocoGt = self.dataset.coco # workaround: temporarily write data to json file because pycocotools can't process dict in py36. if self.testset: json.dump(data_dict, open('yolov3_2017.json', 'w')) cocoDt = cocoGt.loadRes('yolov3_2017.json') else: _, tmp = tempfile.mkstemp() json.dump(data_dict, open(tmp, 'w')) cocoDt = cocoGt.loadRes(tmp) cocoEval = COCOeval(self.dataset.coco, cocoDt, annType[1]) cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() return cocoEval.stats[0], cocoEval.stats[1] else: return 0, 0
def eval(): """ YOLOv3 evaler. See README for details. """ args = parse_args() print("Setting Arguments.. : ", args) cuda = torch.cuda.is_available() and args.use_cuda if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") # Parse config settings with open(args.cfg, 'r') as f: cfg = yaml.safe_load(f) print("successfully loaded config file: ", cfg) test_size = (args.test_size,args.test_size) if args.dataset == 'COCO': evaluator = COCOAPIEvaluator( data_dir='data/COCO/', img_size=test_size, confthre=0.001, nmsthre=0.65, testset=args.testset, vis=args.vis) num_class=80 elif args.dataset == 'VOC': ''' # COCO style evaluation, you have to convert xml annotation files into a json file. evaluator = COCOAPIEvaluator( data_dir='data/VOC/', img_size=test_size, confthre=cfg['TEST']['CONFTHRE'], nmsthre=cfg['TEST']['NMSTHRE'], testset=args.testset, voc = True) ''' evaluator = VOCEvaluator( data_dir='data/VOC/', img_size=test_size, confthre=0.001, nmsthre=0.65, vis=args.vis) num_class=20 # Initiate model if args.asff: from models.yolov3_asff import YOLOv3 print('Testing YOLOv3 with ASFF!') model = YOLOv3(num_classes = num_class, rfb=args.rfb, vis=args.vis) else: from models.yolov3_baseline import YOLOv3 print('Testing YOLOv3 strong baseline!') if args.vis: print('Visualization is not supported for YOLOv3 baseline model') args.vis = False model = YOLOv3(num_classes = num_class, rfb=args.rfb) save_to_disk = (not args.distributed) or distributed_util.get_rank() == 0 if args.checkpoint: print("loading pytorch ckpt...", args.checkpoint) cpu_device = torch.device("cpu") ckpt = torch.load(args.checkpoint, map_location=cpu_device) model.load_state_dict(ckpt,strict=False) if cuda: print("using cuda") torch.backends.cudnn.benchmark = True device = torch.device("cuda") model = model.to(device) if args.half: model = model.half() if args.ngpu > 1: if args.distributed: model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) #model = apex.parallel.DistributedDataParallel(model) else: model = nn.DataParallel(model) dtype = torch.float16 if args.half else torch.float32 if args.distributed: distributed_util.synchronize() ap50_95, ap50 = evaluator.evaluate(model, args.half, args.distributed) if args.distributed: distributed_util.synchronize() sys.exit(0)
def main(): """ YOLOv3 trainer. See README for details. """ args = parse_args() print("Setting Arguments.. : ", args) cuda = torch.cuda.is_available() and args.use_cuda os.makedirs(args.log_dir, exist_ok=True) os.makedirs(args.save_dir, exist_ok=True) if args.distributed: torch.cuda.set_device(args.local_rank) torch.distributed.init_process_group(backend="nccl", init_method="env://") save_prefix = 'yolov3' # Parse config settings with open(args.cfg, 'r') as f: cfg = yaml.safe_load(f) print("successfully loaded config file: ", cfg) lr = cfg['TRAIN']['LR'] epochs = cfg['TRAIN']['MAXEPOCH'] cos = cfg['TRAIN']['COS'] sybn = cfg['TRAIN']['SYBN'] mixup = cfg['TRAIN']['MIX'] no_mixup_epochs = cfg['TRAIN']['NO_MIXUP_EPOCHS'] label_smooth = cfg['TRAIN']['LABAL_SMOOTH'] momentum = cfg['TRAIN']['MOMENTUM'] burn_in = cfg['TRAIN']['BURN_IN'] batch_size = cfg['TRAIN']['BATCHSIZE'] decay = cfg['TRAIN']['DECAY'] ignore_thre = cfg['TRAIN']['IGNORETHRE'] random_resize = cfg['TRAIN']['RANDRESIZE'] input_size = (cfg['TRAIN']['IMGSIZE'], cfg['TRAIN']['IMGSIZE']) test_size = (args.test_size, args.test_size) step = (180, 240) # for no cos lr shedule training # Learning rate setup base_lr = lr if args.dataset == 'COCO': dataset = COCODataset(data_dir='data/COCO/', img_size=input_size, preproc=TrainTransform(rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_labels=50), debug=args.debug) num_class = 80 elif args.dataset == 'VOC': train_sets = [('2007', 'trainval'), ('2012', 'trainval')] dataset = VOCDetection(root='data/VOC', image_sets=train_sets, input_dim=input_size, preproc=TrainTransform(rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_labels=30)) num_class = 20 else: print('Only COCO and VOC datasets are supported!') return save_prefix += ('_' + args.dataset) if label_smooth: save_prefix += '_label_smooth' # Initiate model if args.asff: save_prefix += '_asff' from models.yolov3_asff import YOLOv3 print('Training YOLOv3 with ASFF!') model = YOLOv3(num_classes=num_class, ignore_thre=ignore_thre, label_smooth=label_smooth, rfb=args.rfb, vis=args.vis) else: save_prefix += '_baseline' from models.yolov3_baseline import YOLOv3 print('Training YOLOv3 strong baseline!') if args.vis: print('Visualization is not supported for YOLOv3 baseline model') args.vis = False model = YOLOv3(num_classes=num_class, ignore_thre=ignore_thre, label_smooth=label_smooth, rfb=args.rfb) save_to_disk = (not args.distributed) or distributed_util.get_rank() == 0 def init_yolo(m): for key in m.state_dict(): if key.split('.')[-1] == 'weight': if 'conv' in key: init.kaiming_normal_(m.state_dict()[key], a=0.1, mode='fan_in') if 'linear' in key: init.kaiming_normal_(m.state_dict()[key], a=0.0, mode='fan_in') if 'bn' in key: m.state_dict()[key][...] = 1 elif key.split('.')[-1] == 'bias': m.state_dict()[key][...] = 0 model.apply(init_yolo) if sybn: model = apex.parallel.convert_syncbn_model(model) if args.checkpoint: print("loading pytorch ckpt...", args.checkpoint) cpu_device = torch.device("cpu") ckpt = torch.load(args.checkpoint, map_location=cpu_device) model.load_state_dict(ckpt, strict=False) #model.load_state_dict(ckpt) if cuda: print("using cuda") torch.backends.cudnn.benchmark = True device = torch.device("cuda") model = model.to(device) if args.half: model = model.half() if args.ngpu > 1: if args.distributed: model = apex.parallel.DistributedDataParallel(model, delay_allreduce=True) #model = apex.parallel.DistributedDataParallel(model) else: model = nn.DataParallel(model) if args.tfboard and save_to_disk: print("using tfboard") from torch.utils.tensorboard import SummaryWriter tblogger = SummaryWriter(args.log_dir) model.train() if mixup: from dataset.mixupdetection import MixupDetection dataset = MixupDetection( dataset, preproc=TrainTransform(rgb_means=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), max_labels=50), ) dataset.set_mixup(np.random.beta, 1.5, 1.5) save_prefix += '_mixup' if args.distributed: sampler = torch.utils.data.DistributedSampler(dataset) else: sampler = torch.utils.data.RandomSampler(dataset) batch_sampler = YoloBatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False, input_dimension=input_size) dataloader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=args.n_cpu, pin_memory=True) dataiterator = iter(dataloader) if args.dataset == 'COCO': evaluator = COCOAPIEvaluator(data_dir='data/COCO/', img_size=test_size, confthre=cfg['TEST']['CONFTHRE'], nmsthre=cfg['TEST']['NMSTHRE'], testset=args.testset, vis=args.vis) elif args.dataset == 'VOC': ''' # COCO style evaluation, you have to convert xml annotation files into a json file. evaluator = COCOAPIEvaluator( data_dir='data/VOC/', img_size=test_size, confthre=cfg['TEST']['CONFTHRE'], nmsthre=cfg['TEST']['NMSTHRE'], testset=args.testset, voc = True) ''' evaluator = VOCEvaluator(data_dir='data/VOC/', img_size=test_size, confthre=cfg['TEST']['CONFTHRE'], nmsthre=cfg['TEST']['NMSTHRE'], vis=args.vis) dtype = torch.float16 if args.half else torch.float32 # optimizer setup # set weight decay only on conv.weight if args.no_wd: params_dict = dict(model.named_parameters()) params = [] for key, value in params_dict.items(): if 'conv.weight' in key: params += [{'params': value, 'weight_decay': decay}] else: params += [{'params': value, 'weight_decay': 0.0}] save_prefix += '_no_wd' else: params = model.parameters() optimizer = optim.SGD(params, lr=base_lr, momentum=momentum, dampening=0, weight_decay=decay) if args.half: optimizer = FP16_Optimizer(optimizer, verbose=False) if cos: save_prefix += '_cos' tmp_lr = base_lr def set_lr(tmp_lr): for param_group in optimizer.param_groups: param_group['lr'] = tmp_lr # start training loop start = time.time() epoch = args.start_epoch epoch_size = len(dataset) // (batch_size * args.ngpu) while epoch < epochs + 1: if args.distributed: batch_sampler.sampler.set_epoch(epoch) if epoch > epochs - no_mixup_epochs + 1: args.eval_interval = 1 if mixup: print('Disable mix up now!') mixup = False dataset.set_mixup(None) if args.distributed: sampler = torch.utils.data.DistributedSampler(dataset) else: sampler = torch.utils.data.RandomSampler(dataset) batch_sampler = YoloBatchSampler(sampler=sampler, batch_size=batch_size, drop_last=False, input_dimension=input_size) dataloader = DataLoader(dataset, batch_sampler=batch_sampler, num_workers=args.n_cpu, pin_memory=True) #### DropBlock Shedule ##### Drop_layer = [16, 24, 33] if args.asff: Drop_layer = [16, 22, 29] if (epoch == 5 or (epoch == args.start_epoch and args.start_epoch > 5)) and (args.dropblock): block_size = [1, 3, 5] keep_p = [0.9, 0.9, 0.9] for i in range(len(Drop_layer)): model.module.module_list[Drop_layer[i]].reset( block_size[i], keep_p[i]) if (epoch == 80 or (epoch == args.start_epoch and args.start_epoch > 80)) and (args.dropblock): block_size = [3, 5, 7] keep_p = [0.9, 0.9, 0.9] for i in range(len(Drop_layer)): model.module.module_list[Drop_layer[i]].reset( block_size[i], keep_p[i]) if (epoch == 150 or (epoch == args.start_epoch and args.start_epoch > 150)) and (args.dropblock): block_size = [7, 7, 7] keep_p = [0.9, 0.9, 0.9] for i in range(len(Drop_layer)): model.module.module_list[Drop_layer[i]].reset( block_size[i], keep_p[i]) for iter_i, (imgs, targets, img_info, idx) in enumerate(dataloader): #evaluation if ((epoch % args.eval_interval == 0) and epoch > args.start_epoch and iter_i == 0) or args.test: if not args.test and save_to_disk: torch.save( model.module.state_dict(), os.path.join(args.save_dir, save_prefix + '_' + repr(epoch) + '.pth')) if args.distributed: distributed_util.synchronize() ap50_95, ap50 = evaluator.evaluate(model, args.half) if args.distributed: distributed_util.synchronize() if args.test: sys.exit(0) model.train() if args.tfboard and save_to_disk: tblogger.add_scalar('val/COCOAP50', ap50, epoch) tblogger.add_scalar('val/COCOAP50_95', ap50_95, epoch) # learning rate scheduling (cos or step) if epoch < burn_in: tmp_lr = base_lr * pow((iter_i + epoch * epoch_size) * 1. / (burn_in * epoch_size), 3) set_lr(tmp_lr) elif cos: if epoch <= epochs - no_mixup_epochs and epoch > 20: min_lr = 0.00001 tmp_lr = min_lr + 0.5*(base_lr-min_lr)*(1+math.cos(math.pi*(epoch-20)*1./\ (epochs-no_mixup_epochs-20))) elif epoch > epochs - no_mixup_epochs: tmp_lr = 0.00001 set_lr(tmp_lr) elif epoch == burn_in: tmp_lr = base_lr set_lr(tmp_lr) elif epoch in steps and iter_i == 0: tmp_lr = tmp_lr * 0.1 set_lr(tmp_lr) optimizer.zero_grad() imgs = Variable(imgs.to(device).to(dtype)) targets = Variable(targets.to(device).to(dtype), requires_grad=False) loss_dict = model(imgs, targets, epoch) loss_dict_reduced = reduce_loss_dict(loss_dict) loss = sum(loss for loss in loss_dict['losses']) if args.half: optimizer.backward(loss) else: loss.backward() #torch.nn.utils.clip_grad_norm_(model.parameters(), 10) optimizer.step() if iter_i % 10 == 0 and save_to_disk: # logging end = time.time() print( '[Epoch %d/%d][Iter %d/%d][lr %.6f]' '[Loss: anchor %.2f, iou %.2f, l1 %.2f, conf %.2f, cls %.2f, imgsize %d, time: %.2f]' % (epoch, epochs, iter_i, epoch_size, tmp_lr, sum(anchor_loss for anchor_loss in loss_dict_reduced['anchor_losses']).item(), sum(iou_loss for iou_loss in loss_dict_reduced['iou_losses']).item(), sum(l1_loss for l1_loss in loss_dict_reduced['l1_losses']).item(), sum(conf_loss for conf_loss in loss_dict_reduced['conf_losses']).item(), sum(cls_loss for cls_loss in loss_dict_reduced['cls_losses']).item( ), input_size[0], end - start), flush=True) start = time.time() if args.tfboard and save_to_disk: tblogger.add_scalar( 'train/total_loss', sum(loss for loss in loss_dict_reduced['losses']).item(), epoch * epoch_size + iter_i) # random resizing if random_resize and iter_i % 10 == 0 and iter_i > 0: tensor = torch.LongTensor(1).to(device) if args.distributed: distributed_util.synchronize() if save_to_disk: if epoch > epochs - 10: size = 416 if args.dataset == 'VOC' else 608 else: size = random.randint(*(10, 19)) size = int(32 * size) tensor.fill_(size) if args.distributed: distributed_util.synchronize() dist.broadcast(tensor, 0) input_size = dataloader.change_input_dim( multiple=tensor.item(), random_range=None) if args.distributed: distributed_util.synchronize() epoch += 1 if not args.test and save_to_disk: torch.save( model.module.state_dict(), os.path.join(args.save_dir, "yolov3_" + args.dataset + '_Final.pth')) if args.distributed: distributed_util.synchronize() ap50_95, ap50 = evaluator.evaluate(model, args.half) if args.tfboard and save_to_disk: tblogger.close()
def evaluate(self, model, half=False, distributed=False): """ COCO average precision (AP) Evaluation. Iterate inference on the test dataset and the results are evaluated by COCO API. Args: model : model object Returns: ap50_95 (float) : calculated COCO AP for IoU=50:95 ap50 (float) : calculated COCO AP for IoU=50 """ if isinstance(model, torch.nn.parallel.DistributedDataParallel): model = model.module model.eval() cuda = torch.cuda.is_available() if half: Tensor = torch.cuda.HalfTensor if cuda else torch.HalfTensor else: Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor ids = [] data_dict = [] dataiterator = iter(self.dataloader) img_num = 0 indices = list(range(self.num_images)) dis_indices = indices[distributed_util.get_rank()::distributed_util. get_world_size()] progress_bar = tqdm if distributed_util.is_main_process() else iter num_classes = 1 predictions = {} if distributed_util.is_main_process(): inference_time = 0 nms_time = 0 n_samples = len(dis_indices) for i in progress_bar(indices): img, _, info_img, id_ = self.dataset[i] # load a batch info_img = [float(info) for info in info_img] ids.append(id_) with torch.no_grad(): img = Variable(img.type(Tensor).unsqueeze(0)) if distributed_util.is_main_process() and i > 9: start = time.time() if self.vis: outputs, fuse_weights, fused_f = model(img) else: outputs = model(img) if distributed_util.is_main_process() and i > 9: infer_end = time.time() inference_time += (infer_end - start) outputs = postprocess(outputs, 1, self.confthre, self.nmsthre) if distributed_util.is_main_process() and i > 9: nms_end = time.time() nms_time += (nms_end - infer_end) if outputs[0] is None: predictions[i] = (None, None, None) continue outputs = outputs[0].cpu().data bboxes = outputs[:, 0:4] bboxes[:, 0::2] *= info_img[0] / self.img_size[0] bboxes[:, 1::2] *= info_img[1] / self.img_size[1] cls = outputs[:, 6] scores = outputs[:, 4] * outputs[:, 5] predictions[i] = (bboxes, cls, scores) if self.vis: o_img, _, _, _ = self.dataset.pull_item(i) make_vis('SWIM', i, o_img, fuse_weights, fused_f) class_names = self.dataset._classes bbox = bboxes.clone() bbox[:, 2] = bbox[:, 2] - bbox[:, 0] bbox[:, 3] = bbox[:, 3] - bbox[:, 1] make_pred_vis('SWIM', i, o_img, class_names, bbox, cls, scores) if DEBUG and distributed_util.is_main_process(): o_img, _, _, _ = self.dataset.pull_item(i) class_names = self.dataset._classes bbox = bboxes.clone() bbox[:, 2] = bbox[:, 2] - bbox[:, 0] bbox[:, 3] = bbox[:, 3] - bbox[:, 1] make_pred_vis('SWIM', i, o_img, class_names, bbox, cls, scores) distributed_util.synchronize() #predictions = _accumulate_predictions_from_multiple_gpus(predictions) if not distributed_util.is_main_process(): return 0, 0 print('Main process Evaluating...') a_infer_time = 1000 * inference_time / (n_samples - 10) a_nms_time = 1000 * nms_time / (n_samples - 10) print('Average forward time: %.2f ms, Average NMS time: %.2f ms, Average inference time: %.2f ms' %(a_infer_time, \ a_nms_time, (a_infer_time+a_nms_time))) all_boxes = [[[] for _ in range(self.num_images)] for _ in range(num_classes)] for img_num in range(self.num_images): bboxes, cls, scores = predictions[img_num] if bboxes is None: for j in range(num_classes): all_boxes[j][img_num] = np.empty([0, 5], dtype=np.float32) continue for j in range(num_classes): mask_c = (cls == j) if sum(mask_c) == 0: all_boxes[j][img_num] = np.empty([0, 5], dtype=np.float32) continue c_dets = torch.cat((bboxes, scores.unsqueeze(1)), dim=1) all_boxes[j][img_num] = c_dets[mask_c].numpy() sys.stdout.write('im_eval: {:d}/{:d} \r'.format( img_num + 1, self.num_images)) sys.stdout.flush() with tempfile.TemporaryDirectory() as tempdir: mAP50, mAP70 = self.dataset.evaluate_detections(all_boxes, tempdir) return mAP50, mAP70