def evaluate(model, data_loader, device, mAP_list=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = MetricLogger(delimiter=" ") header = "Test: " coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every(data_loader, 100, header): image = list(img.to(device) for img in image) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) print_txt = coco_evaluator.coco_eval[iou_types[0]].stats coco_mAP = print_txt[0] voc_mAP = print_txt[1] if isinstance(mAP_list, list): mAP_list.append(voc_mAP) return coco_evaluator
def evaluate(model, data_loader, device): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for images, targets in metric_logger.log_every(data_loader, 100, header): images = list(img.to(device) for img in images) torch.cuda.synchronize() model_time = time.time() outputs = model(images) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) return coco_evaluator
def main(opt, hyp): # 初始化各进程 init_distributed_mode(opt) if opt.rank in [-1, 0]: print(opt) print( 'Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/' ) tb_writer = SummaryWriter(comment=opt.name) device = torch.device(opt.device) if "cuda" not in device.type: raise EnvironmentError("not find GPU device for training.") # 使用DDP后会对每个device上的gradients取均值,所以需要放大学习率 hyp["lr0"] *= max(1., opt.world_size * opt.batch_size / 64) wdir = "weights" + os.sep # weights dir best = wdir + "best.pt" results_file = "results.txt" cfg = opt.cfg data = opt.data epochs = opt.epochs batch_size = opt.batch_size # accumulate n times before optimizer update (bs 64) accumulate = max(round(64 / (opt.world_size * opt.batch_size)), 1) weights = opt.weights # initial training weights imgsz_train = opt.img_size imgsz_test = opt.img_size # test image sizes multi_scale = opt.multi_scale # Image sizes # 图像要设置成32的倍数 gs = 32 # (pixels) grid size assert math.fmod( imgsz_test, gs) == 0, "--img-size %g must be a %g-multiple" % (imgsz_test, gs) grid_min, grid_max = imgsz_test // gs, imgsz_test // gs if multi_scale: imgsz_min = opt.img_size // 1.5 imgsz_max = opt.img_size // 0.667 # 将给定的最大,最小输入尺寸向下调整到32的整数倍 grid_min, grid_max = imgsz_min // gs, imgsz_max // gs imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs) imgsz_train = imgsz_max # initialize with max size if opt.rank in [-1, 0]: # 只在第一个进程中显示打印信息 print("Using multi_scale training, image range[{}, {}]".format( imgsz_min, imgsz_max)) # configure run random.seed(0) # 设置随机种子 data_dict = parse_data_cfg(data) train_path = data_dict["train"] test_path = data_dict["valid"] nc = 1 if opt.single_cls else int( data_dict["classes"]) # number of classes hyp["cls"] *= nc / 80 # update coco-tuned hyp['cls'] to current dataset hyp["obj"] *= imgsz_test / 320 if opt.rank in [-1, 0]: # Remove previous results for f in glob.glob(results_file) + glob.glob("tmp.pk"): os.remove(f) # Initialize model model = Darknet(cfg).to(device) start_epoch = 0 best_map = 0.0 # 如果指定了预训练权重,则载入预训练权重 if weights.endswith(".pt"): ckpt = torch.load(weights, map_location=device) # load model try: ckpt["model"] = { k: v for k, v in ckpt["model"].items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(ckpt["model"], strict=False) except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights) raise KeyError(s) from e if opt.rank in [-1, 0]: # load results if ckpt.get("training_results") is not None: with open(results_file, "w") as file: file.write(ckpt["training_results"]) # write results.txt # epochs start_epoch = ckpt["epoch"] + 1 if epochs < start_epoch: print( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (opt.weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt # 是否冻结权重,只训练predictor的权重 if opt.freeze_layers: # 索引减一对应的是predictor的索引,YOLOLayer并不是predictor output_layer_indices = [ idx - 1 for idx, module in enumerate(model.module_list) if isinstance(module, YOLOLayer) ] # 冻结除predictor和YOLOLayer外的所有层 freeze_layer_indeces = [ x for x in range(len(model.module_list)) if (x not in output_layer_indices) and ( x - 1 not in output_layer_indices) ] # Freeze non-output layers # 总共训练3x2=6个parameters for idx in freeze_layer_indeces: for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) else: # 如果freeze_layer为False,默认仅训练除darknet53之后的部分 # 若要训练全部权重,删除以下代码 darknet_end_layer = 74 # only yolov3spp cfg # Freeze darknet53 layers # 总共训练21x3+3x2=69个parameters for idx in range(darknet_end_layer + 1): # [0, 74] for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) # SyncBatchNorm # 如果只训练最后的predictor(其中不含bn层),SyncBatchNorm没有作用 if opt.freeze_layers is False: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[opt.gpu]) model.yolo_layers = model.module.yolo_layers # move yolo layer indices to top level # optimizer pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=hyp["lr0"], momentum=hyp["momentum"], weight_decay=hyp["weight_decay"], nesterov=True) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[ "lrf"]) + hyp["lrf"] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) scheduler.last_epoch = start_epoch # 指定从哪个epoch开始 # dataset # 训练集的图像尺寸指定为multi_scale_range中最大的尺寸 # Make sure only the first process in DDP process the dataset first, and the following others can use the cache. with torch_distributed_zero_first(opt.rank): train_dataset = LoadImagesAndLabels( train_path, imgsz_train, batch_size, augment=True, hyp=hyp, # augmentation hyperparameters rect=opt.rect, # rectangular training cache_images=opt.cache_images, single_cls=opt.single_cls, rank=opt.rank) # 验证集的图像尺寸指定为img_size(512) val_dataset = LoadImagesAndLabels(test_path, imgsz_test, batch_size, hyp=hyp, cache_images=opt.cache_images, single_cls=opt.single_cls, rank=opt.rank) # 给每个rank对应的进程分配训练的样本索引 train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) val_sampler = torch.utils.data.distributed.DistributedSampler(val_dataset) # 将样本索引每batch_size个元素组成一个list train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, batch_size, drop_last=True) # dataloader nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers if opt.rank in [-1, 0]: print('Using %g dataloader workers' % nw) train_data_loader = torch.utils.data.DataLoader( train_dataset, batch_sampler=train_batch_sampler, num_workers=nw, pin_memory=True, collate_fn=train_dataset.collate_fn) val_data_loader = torch.utils.data.DataLoader( val_dataset, batch_size=batch_size, sampler=val_sampler, num_workers=nw, pin_memory=True, collate_fn=val_dataset.collate_fn) # Model parameters model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) # start training # caching val_data when you have plenty of memory(RAM) with torch_distributed_zero_first(opt.rank): if os.path.exists("tmp.pk") is False: coco = get_coco_api_from_dataset(val_dataset) with open("tmp.pk", "wb") as f: pickle.dump(coco, f) else: with open("tmp.pk", "rb") as f: coco = pickle.load(f) if opt.rank in [-1, 0]: print("starting traning for %g epochs..." % epochs) print('Using %g dataloader workers' % nw) start_time = time.time() for epoch in range(start_epoch, epochs): train_sampler.set_epoch(epoch) mloss, lr = train_util.train_one_epoch( model, optimizer, train_data_loader, device, epoch, accumulate=accumulate, # 迭代多少batch才训练完64张图片 img_size=imgsz_train, # 输入图像的大小 multi_scale=multi_scale, grid_min=grid_min, # grid的最小尺寸 grid_max=grid_max, # grid的最大尺寸 gs=gs, # grid step: 32 print_freq=50, # 每训练多少个step打印一次信息 warmup=True) # update scheduler scheduler.step() if opt.notest is False or epoch == epochs - 1: # evaluate on the test dataset result_info = train_util.evaluate(model, val_data_loader, coco=coco, device=device) # only first process in DDP process to record info and save weights if opt.rank in [-1, 0]: coco_mAP = result_info[0] voc_mAP = result_info[1] coco_mAR = result_info[8] # write into tensorboard if tb_writer: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'train/loss', "learning_rate", "mAP@[IoU=0.50:0.95]", "mAP@[IoU=0.5]", "mAR@[IoU=0.50:0.95]" ] for x, tag in zip( mloss.tolist() + [lr, coco_mAP, voc_mAP, coco_mAR], tags): tb_writer.add_scalar(tag, x, epoch) # write into txt with open(results_file, "a") as f: result_info = [str(round(i, 4)) for i in result_info] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") # update best mAP(IoU=0.50:0.95) if coco_mAP > best_map: best_map = coco_mAP if opt.savebest is False: # save weights every epoch with open(results_file, 'r') as f: save_files = { 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'training_results': f.read(), 'epoch': epoch, 'best_map': best_map } torch.save(save_files, "./weights/yolov3spp-{}.pt".format(epoch)) else: # only save best weights if best_map == coco_mAP: with open(results_file, 'r') as f: save_files = { 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'training_results': f.read(), 'epoch': epoch, 'best_map': best_map } torch.save(save_files, best.format(epoch)) total_time = time.time() - start_time total_time_str = str(datetime.timedelta(seconds=int(total_time))) if opt.rank in [-1, 0]: print('Training time {}'.format(total_time_str))
def evaluate(model, data_loader, device, data_set=None, mAP_list=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " if data_set is None: data_set = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(data_set, iou_types) for images, targets in metric_logger.log_every(data_loader, 100, header): images = torch.stack(images, dim=0) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] images = images.to(device) # targets = {k: v.to(device) for k, v in targets.items()} if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() # list((bboxes_out, labels_out, scores_out), ...) results = model(images, targets) outputs = [] for index, (bboxes_out, labels_out, scores_out) in enumerate(results): # 将box的相对坐标信息(0-1)转为绝对值坐标(xmin, ymin, xmax, ymax) height_width = targets[index]["height_width"] # height_width = [300, 300] bboxes_out[:, [0, 2]] = bboxes_out[:, [0, 2]] * height_width[1] bboxes_out[:, [1, 3]] = bboxes_out[:, [1, 3]] * height_width[0] info = {"boxes": bboxes_out.to(cpu_device), "labels": labels_out.to(cpu_device), "scores": scores_out.to(cpu_device)} outputs.append(info) # outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = dict() for index in range(len(outputs)): info = {targets[index]["image_id"].item(): outputs[index]} res.update(info) # res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) print_txt = coco_evaluator.coco_eval[iou_types[0]].stats coco_mAP = print_txt[0] voc_mAP = print_txt[1] if isinstance(mAP_list, list): mAP_list.append(voc_mAP)
def train(hyp): device = torch.device(opt.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) wdir = "weights" + os.sep # weights dir best = wdir + "best.pt" results_file = "results.txt" cfg = opt.cfg data = opt.data epochs = opt.epochs batch_size = opt.batch_size accumulate = max(round(64 / batch_size), 1) # accumulate n times before optimizer update (bs 64) weights = opt.weights # initial training weights imgsz_train = opt.img_size imgsz_test = opt.img_size # test image sizes multi_scale = opt.multi_scale # Image sizes # 图像要设置成32的倍数 gs = 32 # (pixels) grid size assert math.fmod(imgsz_test, gs) == 0, "--img-size %g must be a %g-multiple" % (imgsz_test, gs) grid_min, grid_max = imgsz_test // gs, imgsz_test // gs if multi_scale: imgsz_min = opt.img_size // 1.5 imgsz_max = opt.img_size // 0.667 # 将给定的最大,最小输入尺寸向下调整到32的整数倍 grid_min, grid_max = imgsz_min // gs, imgsz_max // gs imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs) imgsz_train = imgsz_max # initialize with max size print("Using multi_scale training, image range[{}, {}]".format(imgsz_min, imgsz_max)) # configure run # init_seeds() # 初始化随机种子,保证结果可复现 data_dict = parse_data_cfg(data) train_path = data_dict["train"] test_path = data_dict["valid"] nc = 1 if opt.single_cls else int(data_dict["classes"]) # number of classes hyp["cls"] *= nc / 80 # update coco-tuned hyp['cls'] to current dataset hyp["obj"] *= imgsz_test / 320 # Remove previous results for f in glob.glob(results_file): os.remove(f) # Initialize model model = Darknet(cfg).to(device) # 是否冻结权重,只训练predictor的权重 if opt.freeze_layers: # 索引减一对应的是predictor的索引,YOLOLayer并不是predictor output_layer_indices = [idx - 1 for idx, module in enumerate(model.module_list) if isinstance(module, YOLOLayer)] # 冻结除predictor和YOLOLayer外的所有层 freeze_layer_indeces = [x for x in range(len(model.module_list)) if (x not in output_layer_indices) and (x - 1 not in output_layer_indices)] # Freeze non-output layers # 总共训练3x2=6个parameters for idx in freeze_layer_indeces: for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) else: # 如果freeze_layer为False,默认仅训练除darknet53之后的部分 # 若要训练全部权重,删除以下代码 darknet_end_layer = 74 # only yolov3spp cfg # Freeze darknet53 layers # 总共训练21x3+3x2=69个parameters for idx in range(darknet_end_layer + 1): # [0, 74] for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) # optimizer pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=hyp["lr0"], momentum=hyp["momentum"], weight_decay=hyp["weight_decay"], nesterov=True) start_epoch = 0 best_map = 0.0 if weights.endswith(".pt") or weights.endswith(".pth"): ckpt = torch.load(weights, map_location=device) # load model try: ckpt["model"] = {k: v for k, v in ckpt["model"].items() if model.state_dict()[k].numel() == v.numel()} model.load_state_dict(ckpt["model"], strict=False) except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights) raise KeyError(s) from e # load optimizer if ckpt["optimizer"] is not None: optimizer.load_state_dict(ckpt["optimizer"]) if "best_map" in ckpt.keys(): best_map = ckpt["best_map"] # load results if ckpt.get("training_results") is not None: with open(results_file, "w") as file: file.write(ckpt["training_results"]) # write results.txt # epochs start_epoch = ckpt["epoch"] + 1 if epochs < start_epoch: print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (opt.weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp["lrf"]) + hyp["lrf"] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) scheduler.last_epoch = start_epoch # 指定从哪个epoch开始 # Plot lr schedule # y = [] # for _ in range(epochs): # scheduler.step() # y.append(optimizer.param_groups[0]['lr']) # plt.plot(y, '.-', label='LambdaLR') # plt.xlabel('epoch') # plt.ylabel('LR') # plt.tight_layout() # plt.savefig('LR.png', dpi=300) # model.yolo_layers = model.module.yolo_layers # dataset # 训练集的图像尺寸指定为multi_scale_range中最大的尺寸 train_dataset = LoadImageAndLabels(train_path, imgsz_train, batch_size, augment=True, hyp=hyp, # augmentation hyperparameters rect=opt.rect, # rectangular training cache_images=opt.cache_images, single_cls=opt.single_cls) # 验证集的图像尺寸指定为img_size(512) val_dataset = LoadImageAndLabels(test_path, imgsz_test, batch_size, hyp=hyp, rect=True, # 将每个batch的图像调整到合适大小,可减少运算量(并不是512x512标准尺寸) cache_images=opt.cache_images, single_cls=opt.single_cls) # dataloader nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=nw, # Shuffle=True unless rectangular training is used shuffle=not opt.rect, pin_memory=True, collate_fn=train_dataset.collate_fn) val_datasetloader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, num_workers=nw, pin_memory=True, collate_fn=val_dataset.collate_fn) # Model parameters model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) # 计算每个类别的目标个数,并计算每个类别的比重 # model.class_weights = labels_to_class_weights(train_dataset.labels, nc).to(device) # attach class weights # start training # caching val_data when you have plenty of memory(RAM) # coco = None coco = get_coco_api_from_dataset(val_dataset) print("starting traning for %g epochs..." % epochs) print('Using %g dataloader workers' % nw) for epoch in range(start_epoch, epochs): mloss, lr = train_util.train_one_epoch(model, optimizer, train_dataloader, device, epoch, accumulate=accumulate, # 迭代多少batch才训练完64张图片 img_size=imgsz_train, # 输入图像的大小 multi_scale=multi_scale, grid_min=grid_min, # grid的最小尺寸 grid_max=grid_max, # grid的最大尺寸 gs=gs, # grid step: 32 print_freq=50, # 每训练多少个step打印一次信息 warmup=True) # update scheduler scheduler.step() if opt.notest is False or epoch == epochs - 1: # evaluate on the test dataset result_info = train_util.evaluate(model, val_datasetloader, coco=coco, device=device) coco_mAP = result_info[0] voc_mAP = result_info[1] coco_mAR = result_info[8] # write into tensorboard if tb_writer: tags = ['train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'train/loss', "learning_rate", "mAP@[IoU=0.50:0.95]", "mAP@[IoU=0.5]", "mAR@[IoU=0.50:0.95]"] for x, tag in zip(mloss.tolist() + [lr, coco_mAP, voc_mAP, coco_mAR], tags): tb_writer.add_scalar(tag, x, epoch) # write into txt with open(results_file, "a") as f: result_info = [str(round(i, 4)) for i in result_info] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") # update best mAP(IoU=0.50:0.95) if coco_mAP > best_map: best_map = coco_mAP if opt.savebest is False: # save weights every epoch with open(results_file, 'r') as f: save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'training_results': f.read(), 'epoch': epoch, 'best_map': best_map} torch.save(save_files, "./weights/yolov3spp-{}.pt".format(epoch)) else: # only save best weights if best_map == coco_mAP: with open(results_file, 'r') as f: save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'training_results': f.read(), 'epoch': epoch, 'best_map': best_map} torch.save(save_files, best.format(epoch))
def train(hyp): device = torch.device(opt.device if torch.cuda.is_available() else "cpu") cfg = opt.cfg data = opt.data epochs = opt.epochs batch_size = opt.batch_size accumulate = max(round(64 / batch_size), 1) # accumulate n times before optimizer update (bs 64) weights = opt.weights # initial training weights imgsz_train = opt.img_size imgsz_test = opt.img_size # test image sizes multi_scale = opt.multi_scale # Image sizes # 图像要设置成32的倍数 gs = 32 # (pixels) grid size assert math.fmod( imgsz_test, gs) == 0, "--img-size %g must be a %g-multiple" % (imgsz_test, gs) grid_min, grid_max = imgsz_test // gs, imgsz_test // gs if multi_scale: imgsz_min = opt.img_size // 1.5 imgsz_max = opt.img_size // 0.667 # 将给定的最大,最小输入尺寸向下调整到32的整数倍 grid_min, grid_max = imgsz_min // gs, imgsz_max // gs imgsz_min, imgsz_max = int(grid_min * gs), int(grid_max * gs) imgsz_train = imgsz_max # initialize with max size print("Using multi_scale training, image range[{}, {}]".format( imgsz_min, imgsz_max)) # configure run # init_seeds() # 初始化随机种子,保证结果可复现 data_dict = parse_data_cfg(data) train_path = data_dict["train"] test_path = data_dict["valid"] nc = 1 if opt.single_cls else int( data_dict["classes"]) # number of classes hyp["cls"] *= nc / 80 # update coco-tuned hyp['cls'] to current dataset # remove previous results for f in glob.glob("*_batch*.jpg") + glob.glob(results_file): os.remove(f) # Initialize model model = Darknet(cfg).to(device) # optimizer pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in dict(model.named_parameters()).items(): if ".bias" in k: pg2 += [v] # biases (bn biases and conv2d biases) elif "Conv2d.weight" in k: pg1 += [v] # apply weight_decay else: pg0 += [v] # all else, (bn weight) if opt.adam: optimizer = optim.Adam(pg0, lr=hyp["lr0"]) else: optimizer = optim.SGD(pg0, lr=hyp["lr0"], momentum=hyp["momentum"], nesterov=True) optimizer.add_param_group({ "params": pg1, "weight_decay": hyp["weight_decay"] }) # add pg1 with weight_decay optimizer.add_param_group({"params": pg2}) # add pg2 (biases) print('Optimizer groups: %g .bias, %g Conv2d.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 start_epoch = 0 best_fitness = 0.0 if weights.endswith(".pt"): ckpt = torch.load(weights, map_location=device) # load model try: ckpt["model"] = { k: v for k, v in ckpt["model"].items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(ckpt["model"], strict=False) except KeyError as e: s = "%s is not compatible with %s. Specify --weights '' or specify a --cfg compatible with %s. " \ "See https://github.com/ultralytics/yolov3/issues/657" % (opt.weights, opt.cfg, opt.weights) raise KeyError(s) from e # load optimizer if ckpt["optimizer"] is not None: optimizer.load_state_dict(ckpt["optimizer"]) best_fitness = ckpt["best_fitness"] # load results if ckpt.get("training_results") is not None: with open(results_file, "w") as file: file.write(ckpt["training_results"]) # write results.txt # epochs start_epoch = ckpt["epoch"] + 1 if epochs < start_epoch: print( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (opt.weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt if opt.freeze_layers: # 索引减一对应的是predictor的索引,YOLOLayer并不是predictor output_layer_indices = [ idx - 1 for idx, module in enumerate(model.module_list) if isinstance(module, YOLOLayer) ] # 冻结除predictor和YOLOLayer外的所有层 freeze_layer_indeces = [ x for x in range(len(model.module_list)) if (x not in output_layer_indices) and ( x - 1 not in output_layer_indices) ] # Freeze non-output layers for idx in freeze_layer_indeces: for parameter in model.module_list[idx].parameters(): parameter.requires_grad_(False) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ( (1 + math.cos(x * math.pi / epochs)) / 2) * 0.99 + 0.01 # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) scheduler.last_epoch = start_epoch # 指定从哪个epoch开始 # Plot lr schedule # y = [] # for _ in range(epochs+20): # scheduler.step() # y.append(optimizer.param_groups[0]['lr']) # plt.plot(y, '.-', label='LambdaLR') # plt.xlabel('epoch') # plt.ylabel('LR') # plt.tight_layout() # plt.savefig('LR.png', dpi=300) # model.yolo_layers = model.module.yolo_layers # dataset # 训练集的图像尺寸指定为multi_scale_range中最大的尺寸 train_dataset = LoadImageAndLabels( train_path, imgsz_train, batch_size, augment=True, hyp=hyp, # augmentation hyperparameters rect=opt.rect, # rectangular training cache_images=opt.cache_images, single_cls=opt.single_cls) # 验证集的图像尺寸指定为img_size(512) val_dataset = LoadImageAndLabels( test_path, imgsz_test, batch_size, hyp=hyp, rect=True, # 将每个batch的图像调整到合适大小,可减少运算量(并不是512x512标准尺寸) cache_images=opt.cache_images, single_cls=opt.single_cls) # dataloader nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers train_dataloader = torch.utils.data.DataLoader( train_dataset, batch_size=batch_size, num_workers=nw, # Shuffle=True unless rectangular training is used shuffle=not opt.rect, pin_memory=True, collate_fn=train_dataset.collate_fn) val_datasetloader = torch.utils.data.DataLoader( val_dataset, batch_size=batch_size, num_workers=nw, pin_memory=True, collate_fn=val_dataset.collate_fn) # Model parameters model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) # 计算每个类别的目标个数,并计算每个类别的比重 model.class_weights = labels_to_class_weights(train_dataset.labels, nc).to( device) # attach class weights # start training nb = len(train_dataloader) # number of batches n_burn = max(3 * nb, 500) # burn-in iterations, max(3 epochs, 500 iterations) # caching val_data when you have plenty of memory(RAM) print("caching val_data for evaluation.") coco = get_coco_api_from_dataset(val_dataset) print("starting traning for %g epochs..." % epochs) print('Using %g dataloader workers' % nw) for epoch in range(start_epoch, epochs): mloss, lr = train_util.train_one_epoch( model, optimizer, train_dataloader, device, epoch, accumulate=accumulate, # 迭代多少batch才训练完64张图片 img_size=imgsz_train, # 输入图像的大小 batch_size=batch_size, multi_scale=multi_scale, grid_min=grid_min, # grid的最小尺寸 grid_max=grid_max, # grid的最大尺寸 gs=gs, # grid step: 32 print_freq=50, # 每训练多少个step打印一次信息 warmup=True) # update scheduler scheduler.step() # evaluate on the test dataset result_info = train_util.evaluate(model, val_datasetloader, coco=coco, device=device) # write into tensorboard if tb_writer: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'train/loss', "learning_rate", "mAP@[IoU=0.50:0.95]", "mAP@[IoU=0.5]", "mAR@[IoU=0.50:0.95]" ] coco_mAP = result_info[0] voc_mAP = result_info[1] coco_mAR = result_info[8] for x, tag in zip( mloss.tolist() + [lr, coco_mAP, voc_mAP, coco_mAR], tags): tb_writer.add_scalar(tag, x, epoch) # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch } torch.save(save_files, "./weights/yolov3spp-{}.pth".format(epoch))
def main(parser_data): device = torch.device(parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) if not os.path.exists("save_weights"): os.mkdir("save_weights") results_file = "results{}.txt".format(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) data_transform = { "train": transform.Compose([transform.SSDCropping(), transform.Resize(), transform.ColorJitter(), transform.ToTensor(), transform.RandomHorizontalFlip(), transform.Normalization(), transform.AssignGTtoDefaultBox()]), "val": transform.Compose([transform.Resize(), transform.ToTensor(), transform.Normalization()]) } VOC_root = parser_data.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError("VOCdevkit dose not in path:'{}'.".format(VOC_root)) train_dataset = VOC2012DataSet(VOC_root, data_transform['train'], train_set='train.txt') # 注意训练时,batch_size必须大于1 batch_size = parser_data.batch_size assert batch_size > 1, "batch size must be greater than 1" # 防止最后一个batch_size=1,如果最后一个batch_size=1就舍去 drop_last = True if len(train_dataset) % batch_size == 1 else False nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=nw, collate_fn=train_dataset.collate_fn, drop_last=drop_last) val_dataset = VOC2012DataSet(VOC_root, data_transform['val'], train_set='val.txt') val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=nw, collate_fn=train_dataset.collate_fn) model = create_model(num_classes=21, device=device) model.to(device) # define optimizer params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.0005, momentum=0.9, weight_decay=0.0005) # learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.3) # 如果指定了上次训练保存的权重文件地址,则接着上次结果接着训练 if parser_data.resume != "": checkpoint = torch.load(parser_data.resume) model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) parser_data.start_epoch = checkpoint['epoch'] + 1 print("the training process from epoch{}...".format(parser_data.start_epoch)) train_loss = [] learning_rate = [] val_map = [] # 提前加载验证集数据,以免每次验证时都要重新加载一次数据,节省时间 val_data = get_coco_api_from_dataset(val_data_loader.dataset) for epoch in range(parser_data.start_epoch, parser_data.epochs): mean_loss, lr = utils.train_one_epoch(model=model, optimizer=optimizer, data_loader=train_data_loader, device=device, epoch=epoch, print_freq=50) train_loss.append(mean_loss.item()) learning_rate.append(lr) # update learning rate lr_scheduler.step() coco_info = utils.evaluate(model=model, data_loader=val_data_loader, device=device, data_set=val_data) # write into txt with open(results_file, "a") as f: result_info = [str(round(i, 4)) for i in coco_info + [mean_loss.item(), lr]] txt = "epoch:{} {}".format(epoch, ' '.join(result_info)) f.write(txt + "\n") val_map.append(coco_info[1]) # pascal mAP # save weights save_files = { 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'lr_scheduler': lr_scheduler.state_dict(), 'epoch': epoch} torch.save(save_files, "./save_weights/ssd300-{}.pth".format(epoch)) # plot loss and lr curve if len(train_loss) != 0 and len(learning_rate) != 0: from plot_curve import plot_loss_and_lr plot_loss_and_lr(train_loss, learning_rate) # plot mAP curve if len(val_map) != 0: from plot_curve import plot_map plot_map(val_map)
def main(parser_data): device = torch.device( parser_data.device if torch.cuda.is_available() else "cpu") print("Using {} device training.".format(device.type)) data_transform = {"val": transforms.Compose([transforms.ToTensor()])} # read class_indict label_json_path = './pascal_voc_classes.json' assert os.path.exists( label_json_path), "json file {} dose not exist.".format( label_json_path) json_file = open(label_json_path, 'r') class_dict = json.load(json_file) category_index = {v: k for k, v in class_dict.items()} VOC_root = parser_data.data_path # check voc root if os.path.exists(os.path.join(VOC_root, "VOCdevkit")) is False: raise FileNotFoundError( "VOCdevkit dose not in path:'{}'.".format(VOC_root)) # 注意这里的collate_fn是自定义的,因为读取的数据包括image和targets,不能直接使用默认的方法合成batch batch_size = parser_data.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using %g dataloader workers' % nw) # load validation data set val_data_set = VOC2007DataSet(VOC_root, data_transform["val"], "val.txt") val_data_set_loader = torch.utils.data.DataLoader( val_data_set, batch_size=batch_size, shuffle=False, num_workers=nw, collate_fn=val_data_set.collate_fn) # create model num_classes equal background + 20 classes backbone = resnet50_fpn_backbone() model = FasterRCNN(backbone=backbone, num_classes=parser_data.num_classes + 1) # 载入你自己训练好的模型权重 weights_path = parser_data.weights assert os.path.exists(weights_path), "not found {} file.".format( weights_path) weights_dict = torch.load(weights_path, map_location=device) model.load_state_dict(weights_dict['model']) # print(model) model.to(device) # evaluate on the test dataset coco = get_coco_api_from_dataset(val_data_set) iou_types = ["bbox"] coco_evaluator = CocoEvaluator(coco, iou_types) cpu_device = torch.device("cpu") model.eval() with torch.no_grad(): for image, targets in tqdm(val_data_set_loader, desc="validation..."): # 将图片传入指定设备device image = list(img.to(device) for img in image) # inference outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } coco_evaluator.update(res) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() coco_eval = coco_evaluator.coco_eval["bbox"] # calculate COCO info for all classes coco_stats, print_coco = summarize(coco_eval) # calculate voc info for every classes(IoU=0.5) voc_map_info_list = [] for i in range(len(category_index)): stats, _ = summarize(coco_eval, catId=i) voc_map_info_list.append(" {:15}: {}".format(category_index[i + 1], stats[1])) print_voc = "\n".join(voc_map_info_list) print(print_voc) # 将验证结果保存至txt文件中 with open("record_mAP.txt", "w") as f: record_lines = [ "COCO results:", print_coco, "", "mAP(IoU=0.5) for each category:", print_voc ] f.write("\n".join(record_lines))
def evaluate(model, data_loader, coco=None, device=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " if coco is None: coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for imgs, targets, paths, _, img_index in metric_logger.log_every(data_loader, 100, header): imgs = imgs.to(device).float() / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 # targets = targets.to(device) # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() pred = model(imgs)[0] # only get inference result pred = non_max_suppression(pred, conf_thres=0.001, iou_thres=0.6, multi_label=False) outputs = [] for index, p in enumerate(pred): if p is None: p = torch.empty((0, 6), device=cpu_device) boxes = torch.empty((0, 4), device=cpu_device) else: # xmin, ymin, xmax, ymax boxes = p[:, :4] # 注意这里传入的boxes格式必须是xmin, ymin, xmax, ymax,且为绝对坐标 info = {"boxes": boxes.to(cpu_device), "labels": p[:, 5].to(device=cpu_device, dtype=torch.int64), "scores": p[:, 4].to(cpu_device)} outputs.append(info) model_time = time.time() - model_time res = {img_id: output for img_id, output in zip(img_index, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) result_info = coco_evaluator.coco_eval[iou_types[0]].stats return result_info
def evaluate(self, data_loader, coco=None, device=None): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) if not device: device = torch.device("cpu") self.model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = "Test: " if coco is None: coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(self.model) coco_evaluator = CocoEvaluator(coco, iou_types) log_every = metric_logger.log_every(data_loader, 100, header) for images, targets, paths, shapes, img_index in log_every: images = images.to(device).float( ) / 255.0 # uint8 to float32, 0 - 255 to 0.0 - 1.0 # 当使用CPU时,跳过GPU相关指令 if device != torch.device("cpu"): torch.cuda.synchronize(device) model_time = time.time() pred = self.model(images)[0] # only get inference result pred = non_max_suppression(pred, conf_thres=0.001, iou_thres=0.6, multi_label=False) outputs = [] for index, pred_i in enumerate(pred): if pred_i is None: pred_i = torch.empty((0, 6), device=device) boxes = torch.empty((0, 4), device=device) else: boxes = pred_i[:, :4] # l, t, r, b # shapes: (h0, w0), ((h / h0, w / w0), pad) # 将boxes信息还原回原图尺度,这样计算的mAP才是准确的 boxes = scale_coordinates(boxes, images[index].shape[1:], shapes[index]).round() image = images[index] self.img_show(image, boxes) # 注意这里传入的boxes格式必须是 l_abs, t_abs, r_abs, b_abs,且为绝对坐标 info = { "boxes": boxes.to(device), "labels": pred_i[:, 5].to(device=device, dtype=torch.int64), "scores": pred_i[:, 4].to(device) } outputs.append(info) model_time = time.time() - model_time res = { img_id: output for img_id, output in zip(img_index, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) result_info = coco_evaluator.coco_eval[ iou_types[0]].stats.tolist() # numpy to list return result_info