def train(hyp, opt, device, tb_writer=None, wandb=None): logger.info(f'Hyperparameters {hyp}') log_dir = Path(tb_writer.log_dir) if tb_writer else Path( opt.logdir) / 'evolve' # logging directory log_dir = Path("/project/train/log") wdir = Path("/project/train/models") # weights directory os.makedirs(wdir / 'last', exist_ok=True) os.makedirs(wdir / 'final', exist_ok=True) last = wdir / 'last' / 'last.pt' best = wdir / 'final' / 'best.pt' results_file = str(log_dir / 'log.txt') epochs, batch_size, total_batch_size, weights, rank = \ opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Save run settings with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first(rank): print(data_dict) check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc, names = (1, ['item']) if opt.single_cls else (int( data_dict['nc']), data_dict['names']) # number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint if hyp.get('anchors'): ckpt['model'].yaml['anchors'] = round( hyp['anchors']) # force autoanchor model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [ ] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info( 'Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze freeze = [] # parameter names to freeze (full or partial) for k, v in model.named_parameters(): v.requires_grad = True # train all layers if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_modules(): if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter): pg2.append(v.bias) # biases if isinstance(v, nn.BatchNorm2d): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter): pg1.append(v.weight) # apply decay if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[ 'lrf']) + hyp['lrf'] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Logging if wandb and wandb.run is None: id = ckpt.get('wandb_id') if 'ckpt' in locals() else None wandb_run = wandb.init(config=opt, resume="allow", project="YOLOv5", name=os.path.basename(log_dir), id=id) # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % ( weights, epochs) shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}' ) # save previous weights if epochs < start_epoch: logger.info( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes gs = int(max(model.stride)) # grid size (max stride) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, augment=False, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) plot_labels(labels, save_dir=log_dir) if tb_writer: # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 tb_writer.add_histogram('classes', c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0 ) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info('Image sizes %g train, %g test\n' 'Using %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * ( 1 - maps)**2 # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch) ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss( pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print # if rank in [-1, 0]: if i % 500 == 0: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if ni < 3: f = str(log_dir / f'train_batch{ni}.jpg') # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) # if tb_writer and result is not None: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: ema.update_attr( model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test( opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=log_dir, plots=epoch == 0 or final_epoch, # plot first and last log_imgs=opt.log_imgs) # Write with open(results_file, 'a') as f: f.write( s + '%10.4g' * 7 % results + '\n') # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/log%s.txt' % (results_file, opt.bucket, opt.name)) # Log tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params # logger.info(results) for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: tb_writer.add_scalar(tag, x, epoch) # tensorboard if wandb: wandb.log({tag: x}) # W&B # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict(), 'wandb_id': wandb_run.id if wandb else None } ckpt = {'model': ema.ema} # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers n = opt.name if opt.name.isnumeric() else '' fresults, flast, fbest = log_dir / f'log{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt' for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename if str(f2).endswith('.pt'): # is *.pt strip_optimizer(f2) # strip optimizer os.system( 'gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload # Finish if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def train(hyp, opt, device, tb_writer=None): logger.info(f'Hyperparameters {hyp}') # 获取记录训练日志的路径 # 如果设置进化算法则不会传入tb_writer(则为None),设置一个evolve文件夹作为日志目录 log_dir = Path(tb_writer.log_dir) if tb_writer else Path( opt.logdir) / 'evolve' # logging directory # 设置保存权重的路径 wdir = log_dir / 'weights' # weights directory os.makedirs(wdir, exist_ok=True) last = wdir / 'last.pt' best = wdir / 'best.pt' # 设置保存results的路径 results_file = str(log_dir / 'results.txt') # 获取轮次、批次、总批次(涉及到分布式训练)、权重、进程序号(主要用于分布式训练) epochs, batch_size, total_batch_size, weights, rank = \ opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # rank = -1 # Save run settings # 保存hyp和opt with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = (device.type != 'cpu') # 设置随机种子 init_seeds(2 + rank) with open(opt.data) as f: # 加载数据配置信息 data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first( rank): # torch_distributed_zero_first同步所有进程 check_dataset( data_dict ) # check_dataset检查数据集,如果没找到数据集则下载数据集(仅适用于项目中自带的yaml文件数据集) # 获取训练集、测试集图片路径 train_path = data_dict['train'] test_path = data_dict['val'] # 获取类别数量和类别名字, 如果设置了opt.single_cls则为一类 nc, names = (1, ['item']) if opt.single_cls else (int( data_dict['nc']), data_dict['names']) # number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model pretrained = weights.endswith('.pt') if pretrained: # 如果采用预训练 # 加载模型,从google云盘中自动下载模型 # 但通常会下载失败,建议提前下载下来放进weights目录 with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally # 加载检查点 ckpt = torch.load(weights, map_location=device) # load checkpoint if hyp.get('anchors'): ckpt['model'].yaml['anchors'] = round( hyp['anchors']) # force autoanchor """ 这里模型创建,可通过opt.cfg,也可通过ckpt['model'].yaml 这里的区别在于是否是resume,resume时会将opt.cfg设为空,则按照ckpt['model'].yaml创建模型; 这也影响着下面是否除去anchor的key(也就是不加载anchor),如果resume则不加载anchor 主要是因为保存的模型会保存anchors,有时候用户自定义了anchor之后,再resume,则原来基于coco数据集的anchor就会覆盖自己设定的anchor, 参考https://github.com/ultralytics/yolov5/issues/459 所以下面设置了intersect_dicts,该函数就是忽略掉exclude """ model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [ ] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load # 显示加载预训练权重的的键值对和创建模型的键值对 # 如果设置了resume,则会少加载两个键值对(anchors,anchor_grid) logger.info( 'Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: # 创建模型, ch为输入图片通道 model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze """ 冻结模型层,设置冻结层名字即可 具体可以查看https://github.com/ultralytics/yolov5/issues/679 但作者不鼓励冻结层,因为他的实验当中显示冻结层不能获得更好的性能,参照:https://github.com/ultralytics/yolov5/pull/707 并且作者为了使得优化参数分组可以正常进行,在下面将所有参数的requires_grad设为了True 其实这里只是给一个freeze的示例 """ freeze = [ '', ] # parameter names to freeze (full or partial) if any(freeze): for k, v in model.named_parameters(): # print(k,v) if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer """ nbs为模拟的batch_size; 就比如默认的话上面设置的opt.batch_size为16,这个nbs就为64, 也就是模型梯度累积了64/16=4(accumulate)次之后 再更新一次模型,变相的扩大了batch_size """ nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing accumulate = 4 # 根据accumulate设置权重衰减系数 hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay # 将模型分成三组(weight、bn, bias, 其他所有参数)优化 pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_parameters(): # print(k) v.requires_grad = True if '.bias' in k: pg2.append(v) # biases elif '.weight' in k and '.bn' not in k: pg1.append(v) # apply weight decay else: pg0.append(v) # all else # 选用优化器,并设置pg0组的优化方式 if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) # 设置weight、bn的优化方式 optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay # 设置biases的优化方式 optimizer.add_param_group({'params': pg2}) # add pg2 (biases) # 打印优化信息 logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # 设置学习率衰减,这里为余弦退火方式进行衰减 # 就是根据以下公式lf,epoch和超参数hyp['lrf']进行衰减 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[ 'lrf']) + hyp['lrf'] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Resume # 初始化开始训练的epoch和最好的结果 # best_fitness是以[0.0, 0.0, 0.1, 0.9]为系数并乘以[精确度, 召回率, [email protected], [email protected]:0.95]再求和所得 # 根据best_fitness来保存best.pt start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer # 加载优化器与 best_fitness if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results # 加载训练结果result.txt if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs # 加载训练的轮次 # print(ckpt['epoch']) start_epoch = ckpt['epoch'] + 1 # ckpt['epoch'] = -1 """ 如果resume,则备份权重 尽管目前resume能够近似100%成功的起作用了,参照:https://github.com/ultralytics/yolov5/pull/756 但为了防止resume时出现其他问题,把之前的权重覆盖了,所以这里进行备份,参照:https://github.com/ultralytics/yolov5/pull/765 """ if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % ( weights, epochs) shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}' ) # save previous weights """ 如果新设置epochs小于加载的epoch, 则视新设置的epochs为需要再训练的轮次数而不再是总的轮次数 """ if epochs < start_epoch: logger.info( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes # 获取模型总步长和模型输入图片分辨率 gs = int(max(model.stride)) # grid size (max stride) # 检查输入图片分辨率确保能够整除总步长gs imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # imgsz, imgsz_test 都是640 # DP mode # 分布式训练,参照:https://github.com/ultralytics/yolov5/issues/475 # DataParallel模式,仅支持单机多卡 # rank为进程编号, 这里应该设置为rank=-1则使用DataParallel模式 # rank=-1且gpu数量=1时,不会进行分布式 if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # 执行了 # SyncBatchNorm # 使用跨卡同步BN if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Exponential moving average 指数滑动平均,或指数加权平均 # 为模型创建EMA指数滑动平均,如果GPU进程数大于1,则不创建 ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode # 如果rank不等于-1,则使用DistributedDataParallel模式 # local_rank为gpu编号,rank为进程,例如rank=3,local_rank=0 表示第 3 个进程内的第 1 块 GPU。 if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader # 创建训练集dataloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers) """ 获取标签中最大的类别值,并于类别数作比较 如果小于类别数则表示有问题 """ mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: # 更新ema模型的updates参数,保持ema的平滑性 ema.updates = start_epoch * nb // accumulate # set EMA updates # 创建测试集dataloader testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, augment=False, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader if not opt.resume: # 将所有样本的标签拼接到一起shape为(total, 5),统计后做可视化 labels = np.concatenate(dataset.labels, 0) # 获得所有样本的类别 c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) # 根据上面的统计对所有样本的类别,中心点xy位置,长宽wh做可视化 plot_labels(labels, save_dir=log_dir) if tb_writer: # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 tb_writer.add_histogram('classes', c, 0) # Anchors """ 计算默认锚点anchor与数据集标签框的长宽比值 标签的长h宽w与anchor的长h_a宽w_a的比值, 即h/h_a, w/w_a都要在(1/hyp['anchor_t'], hyp['anchor_t'])是可以接受的 如果标签框满足上面条件的数量小于总数的99%,则根据k-mean算法聚类新的锚点anchor """ if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters # 根据自己数据集的类别数设置分类损失的系数 hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset # 设置类别数,超参数 model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model """ 设置giou的值在objectness loss中做标签的系数, 使用代码如下 tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype) 这里model.gr=1,也就是说完全使用标签框与预测框的giou值来作为该预测框的objectness标签 """ model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) # 根据labels初始化图片采样权重 model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights # 获取类别的名字 model.names = names # Start training t0 = time.time() # 获取热身训练的迭代次数 nw = max(round(hyp['warmup_epochs'] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training # 初始化mAP和results maps = np.zeros(nc) # mAP per class results = ( 0, 0, 0, 0, 0, 0, 0 ) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' """ 设置学习率衰减所进行到的轮次, 目的是打断训练后,--resume接着训练也能正常的衔接之前的训练进行学习率衰减 """ scheduler.last_epoch = start_epoch - 1 # do not move # 通过torch1.6自带的api设置混合精度训练 scaler = amp.GradScaler(enabled=cuda) """ 打印训练和测试输入图片分辨率 加载图片时调用的cpu进程数 从哪个epoch开始训练 """ logger.info( 'Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices """ 如果设置进行图片采样策略, 则根据前面初始化的图片采样权重model.class_weights以及maps配合每张图片包含的类别数 通过random.choices生成图片索引indices从而进行采样 """ if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * ( 1 - maps)**2 # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP # 如果是DDP模式,则广播采样策略 if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() # 广播索引到其他group dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders # 初始化训练时打印的平均损失信息 mloss = torch.zeros(4, device=device) # mean losses if rank != -1: # DDP模式下打乱数据, ddp.sampler的随机采样数据是基于epoch+seed作为随机种子, # 每次epoch不同,随机种子就不同 dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) if rank in [-1, 0]: # tqdm 创建进度条,方便训练时 信息的展示 pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- # 计算迭代的次数iteration ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup """ 热身训练(前nw次迭代) 在前nw次迭代中,根据以下方式选取accumulate和学习率 """ if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 """ bias的学习率从0.1下降到基准学习率lr*lf(epoch), 其他的参数学习率从0增加到lr*lf(epoch). lf为上面设置的余弦退火的衰减函数 """ x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch) ]) # 动量momentum也从0.9慢慢变到hyp['momentum'](default=0.937) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale # 设置多尺度训练,从imgsz * 0.5, imgsz * 1.5 + gs随机选取尺寸 if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward # 混合精度 with amp.autocast(enabled=cuda): pred = model(imgs) # forward 前向传播 # Loss # 计算损失,包括分类损失,objectness损失,框的回归损失 # loss为总损失值,loss_items为一个元组,包含分类损失,objectness损失,框的回归损失和总损失 loss, loss_items = compute_loss( pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: # 平均不同gpu之间的梯度 loss *= opt.world_size # gradient averaged between devices in DDP mode # Backward # 反向传播 scaler.scale(loss).backward() # Optimize # 模型反向传播accumulate次之后再根据累积的梯度更新一次参数 if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: # 打印显存,进行的轮次,损失,target的数量和图片的size等信息 mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) # 进度条显示以上信息 pbar.set_description(s) # Plot # 将前三次迭代batch的标签框在图片上画出来并保存 if ni < 3: f = str(log_dir / ('train_batch%g.jpg' % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler # 进行学习率衰减 lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: # 更新EMA的属性 # 添加include的属性 ema.update_attr( model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) # 判断该epoch是否为最后一轮 final_epoch = epoch + 1 == epochs # 对测试集进行测试,计算mAP等指标 # 测试时使用的是EMA模型 if not opt.notest or final_epoch: # Calculate mAP if final_epoch: # replot predictions [ os.remove(x) for x in glob.glob( str(log_dir / 'test_batch*_pred.jpg')) if os.path.exists(x) ] results, maps, times = test.test(opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=log_dir) # Write # 将指标写入result.txt with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) # 如果设置opt.bucket, 上传results.txt到谷歌云盘 if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Tensorboard # 添加指标,损失等信息到tensorboard显示 if tb_writer: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP # 更新best_fitness fi = fitness(np.array(results).reshape( 1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi # Save model """ 保存模型,还保存了epoch,results,optimizer等信息, optimizer将不会在最后一轮完成后保存 model保存的是EMA的模型 """ save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers """ 模型训练完后,strip_optimizer函数将optimizer从ckpt中去除; 并且对模型进行model.half(), 将Float32的模型->Float16, 可以减少模型大小,提高inference速度 """ n = opt.name if opt.name.isnumeric() else '' fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt' for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename if str(f2).endswith('.pt'): # is *.pt strip_optimizer(f2) # strip optimizer # 上传结果到谷歌云盘 os.system( 'gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload # Finish # 可视化results.txt文件 if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) # 释放显存 dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def train(hyp, opt, device, tb_writer=None): logger.info(f'Hyperparameters {hyp}') log_dir = Path(tb_writer.log_dir) if tb_writer else Path( opt.logdir) / 'evolve' # logging directory wdir = log_dir / 'weights' # weights directory os.makedirs(wdir, exist_ok=True) last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = str(log_dir / 'results.txt') epochs, batch_size, total_batch_size, weights, rank = \ opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Save run settings with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc, names = (1, ['item']) if opt.single_cls else (int( data_dict['nc']), data_dict['names']) # number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze freeze = [ '', ] # parameter names to freeze (full or partial) if any(freeze): for k, v in model.named_parameters(): if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_parameters(): v.requires_grad = True if '.bias' in k: pg2.append(v) # biases elif '.weight' in k and '.bn' not in k: pg1.append(v) # apply weight decay else: pg0.append(v) # all else if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[ 'lrf']) + hyp['lrf'] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # Resume start_epoch, best_fitness = 0, 0.0 # Image sizes gs = int(max(model.stride)) # grid size (max stride) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, augment=False, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes plot_labels(labels, save_dir=log_dir) if tb_writer: tb_writer.add_histogram('classes', c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = ( 0, 0, 0, 0, 0, 0, 0 ) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info( 'Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * ( 1 - maps)**2 # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch) ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss( pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if ni < 3: f = str(log_dir / ('train_batch%g.jpg' % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: ema.update_attr( model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP if final_epoch: # replot predictions [ os.remove(x) for x in glob.glob( str(log_dir / 'test_batch*_pred.jpg')) if os.path.exists(x) ] results, maps, times = test.test(opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=log_dir) # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Tensorboard if tb_writer: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers n = opt.name if opt.name.isnumeric() else '' fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt' for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename if str(f2).endswith('.pt'): # is *.pt strip_optimizer(f2) # strip optimizer os.system( 'gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload # Finish if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def test(data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling plots=True): # Initialize/load model and set device # 判断是否在训练时调用test,如果是则获取训练时的设备 training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) save_txt = opt.save_txt # save *.txt labels if save_txt: out = Path('inference/output') if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Remove previous # 删除之前的test_batch0_gt.jpg和test_batch0_pred.jpg for f in glob.glob(str(save_dir / 'test_batch*.jpg')): os.remove(f) # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half # 如果设备不是cpu,则将模型由Float32转为Float16,提高前向传播的速度 half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure # 将模型字符串转变为函数 model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes # 设置iou阈值,从0.5~0.95,每间隔0.05取一次 iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 # iou个数 niou = iouv.numel() # Dataloader if not training: # 创建一个全0数组测试一下前向传播是否正常运行 img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once # 获取图片路径 path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images # 创建dataloader # 注意这里rect参数为True,yolov5的测试评估是基于矩形推理的 dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] # 初始化测试的图片数量 seen = 0 # 获取类别的名字 names = model.names if hasattr(model, 'names') else model.module.names """ 获取coco数据集的类别索引 这里要说明一下,coco数据集有80个类别(索引范围应该为0~79), 但是他的索引却属于0~90(笔者是通过查看coco数据测试集的json文件发现的,具体原因不知) coco80_to_coco91_class()就是为了与上述索引对应起来,返回一个范围在0~90的索引数组 """ coco91class = coco80_to_coco91_class() # 设置tqdm进度条的显示信息 s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') # 初始化指标,时间 p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. # 初始化测试集的损失 loss = torch.zeros(4, device=device) # 初始化json文件的字典,统计信息,ap jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): ''' i: batch_index, 第i个batch imgs : torch.Size([batch_size, 3, weights, heights]) targets : torch.Size = (该batch中的目标数量, [该image属于该batch的第几个图片, class, xywh, Θ]) paths : List['img1_path','img2_path',......,'img-1_path'] len(paths)=batch_size shape : ''' img = img.to(device, non_blocking=True) # 图片也由Float32->Float16 img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() ''' Detect层在的输出:(z,x) if training : x list: [small_forward, medium_forward, large_forward] eg:small_forward.size=( batch_size, 3种scale框, size1, size2, no) else : (z,x) z tensor: [small+medium+large_inference] size=(batch_size, 3 * (small_size1*small_size2 + medium_size1*medium_size2 + large_size1*large_size2), no) 真实坐标 x list: [small_forward, medium_forward, large_forward] eg:small_forward.size=( batch_size, 3种scale框, size1, size2, no) ''' inf_out, train_out = model(img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:4] # box, obj, cls, angle # Run NMS t = time_synchronized() # output : size = (batch_size, num_conf_nms, [xywhθ,conf,classid]) θ∈[0,179] #output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) output = rotate_non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): ''' targets : torch.Size = (该batch中的目标数量, [该image属于该batch的第几个图片, class, xywh, θ]) θ∈[0,179] pred : shape=(num_conf_nms, [xywhθ,conf,classid]) θ∈[0,179] si : 该batch中的第几张图 ''' # labels: shape= (num, [class, xywh, θ]) labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # # Append to text file # if save_txt: # gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh # x = pred.clone() # x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original # for *xyxy, conf, cls in x: # xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh # with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f: # f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format # Clip boxes to image bounds # clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary # if save_json: # # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... # image_id = Path(paths[si]).stem # box = pred[:, :4].clone() # xyxy # scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape # box = xyxy2xywh(box) # xywh # box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner # for p, b in zip(pred.tolist(), box.tolist()): # jdict.append({'image_id': int(image_id) if image_id.isnumeric() else image_id, # 'category_id': coco91class[int(p[5])], # 'bbox': [round(x, 3) for x in b], # 'score': round(p[4], 5)}) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) # pred : shape=(num_conf_nms, [xywhθ,conf,classid]) θ∈[0,179] # labels: shape= (num, [class, xywh, θ]) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # torch.size(num) # target boxes -> orignal shape tbox = labels[:, 1:5] * whwh # torch.size(num,[xywh]) 1024*1024 无所谓顺序 #ttheta = labels[:, 5] # torch.size(num,[Θ]) # Per target class for cls in torch.unique(tcls_tensor): # unique函数去除其中重复的元素,并按元素(类别)由大到小返回一个新的无元素重复的元组或者列表 ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # target indices pi = (cls == pred[:, 6]).nonzero(as_tuple=False).view(-1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices #rious, i = rbox_iou(pred[:, :4], pred[:, 4].unsqueeze(1), tbox, ttheta.unsqueeze(1)).max(1) # best rious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 5].cpu(), pred[:, 6].cpu(), tcls)) # Plot images if plots and batch_i < 1: f = save_dir / ('test_batch%g_gt.jpg' % batch_i) # filename plot_images(img, targets, paths, str(f), names) # ground truth f = save_dir / ('test_batch%g_pred.jpg' % batch_i) plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=save_dir / 'precision-recall_curve.png') p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and len(jdict): f = 'detections_val2017_%s_results.json' % \ (weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '') # filename print('\nCOCO mAP with pycocotools... saving %s...' % f) with open(f, 'w') as file: json.dump(jdict, file) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] cocoGt = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(f) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def train(hyp, opt, device, tb_writer=None): logger.info(f'Hyperparameters {hyp}') """ 获取记录训练日志的路径: 训练日志包括:权重、tensorboard文件、超参数hyp、设置的训练参数opt(也就是epochs,batch_size等),result.txt result.txt包括: 占GPU内存、训练集的GIOU loss, objectness loss, classification loss, 总loss, targets的数量, 输入图片分辨率, 准确率TP/(TP+FP),召回率TP/P ; 测试集的mAP50, [email protected]:0.95, GIOU loss, objectness loss, classification loss. 还会保存batch<3的ground truth """ # 如果设置进化算法则不会传入tb_writer(则为None),设置一个evolve文件夹作为日志目录 log_dir = Path(tb_writer.log_dir) if tb_writer else Path( opt.logdir) / 'evolve' # logging directory # 设置生成文件的保存路径 wdir = log_dir / 'weights' # weights directory os.makedirs(wdir, exist_ok=True) last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = str(log_dir / 'results.txt') # 获取轮次、批次、总批次(涉及到分布式训练)、权重、进程序号(主要用于分布式训练) epochs, batch_size, total_batch_size, weights, rank = \ opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Save run settings # 保存hyp和opt with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure # 获取数据路径 cuda = device.type != 'cpu' # 设置随机种子 # 需要在每一个进程设置相同的随机种子,以便所有模型权重都初始化为相同的值,即确保神经网络每次初始化都相同 init_seeds(2 + rank) # 加载数据配置信息 with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict # torch_distributed_zero_first同步所有进程 # check_dataset检查数据集,如果没找到数据集则下载数据集(仅适用于项目中自带的yaml文件数据集) with torch_distributed_zero_first(rank): check_dataset(data_dict) # check # 获取训练集、测试集图片路径 train_path = data_dict['train'] test_path = data_dict['val'] # 获取类别数量和类别名字 # 如果设置了opt.single_cls则为一类 nc, names = (1, ['item']) if opt.single_cls else ( int(data_dict['nc']), data_dict['names']) # 保存data.yaml中的number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model # 判断weights字符串是否以'.pt'为结尾。若是,则说明本次训练需要预训练模型 pretrained = weights.endswith('.pt') if pretrained: # 加载模型,从google云盘中自动下载模型 # 但通常会下载失败,建议提前下载下来放进weights目录 with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint 导入权重文件 """ 这里模型创建,可通过opt.cfg,也可通过ckpt['model'].yaml 这里的区别在于是否是resume,resume时会将opt.cfg设为空, 则按照ckpt['model'].yaml创建模型; 这也影响着下面是否除去anchor的key(也就是不加载anchor), 如果resume,则加载权重中保存的anchor来继续训练; 主要是预训练权重里面保存了默认coco数据集对应的anchor, 如果用户自定义了anchor,再加载预训练权重进行训练,会覆盖掉用户自定义的anchor; 所以这里主要是设定一个,如果加载预训练权重进行训练的话,就去除掉权重中的anchor,采用用户自定义的; 如果是resume的话,就是不去除anchor,就权重和anchor一起加载, 接着训练; 参考https://github.com/ultralytics/yolov5/issues/459 所以下面设置了intersect_dicts,该函数就是忽略掉exclude中的键对应的值 """ ''' ckpt: {'epoch': -1, 'best_fitness': array([ 0.49124]), 'training_results': None, 'model': Model( ... ) 'optimizer': None } ''' if hyp.get('anchors'): # 用户自定义的anchors优先级大于权重文件中自带的anchors ckpt['model'].yaml['anchors'] = round( hyp['anchors']) # force autoanchor # 创建并初始化yolo模型 model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create ''' model = Model( (model): Sequential( (0): Focus(...) ... (24): Detect(...) ) ) ''' # 如果opt.cfg存在,或重新设置了'anchors',则将预训练权重文件中的'anchors'参数清除,使用用户自定义的‘anchors’信息 exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [ ] # exclude keys # state_dict变量存放训练过程中需要学习的权重和偏执系数,state_dict 是一个python的字典格式,以字典的格式存储,然后以字典的格式被加载,而且只加载key匹配的项 # 将ckpt中的‘model’中的”可训练“的每一层的参数建立映射关系(如 'conv1.weight': 数值...)存在state_dict中 state_dict = ckpt['model'].float().state_dict() # to FP32 # 加载除了与exclude以外,所有与key匹配的项的参数 即将权重文件中的参数导入对应层中 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect # 将最终模型参数导入yolo模型 model.load_state_dict(state_dict, strict=False) # load logger.info( 'Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: # 不进行预训练,则直接创建并初始化yolo模型 model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze #freeze = ['', ] # parameter names to freeze (full or partial) freeze = ['model.%s.' % x for x in range(10) ] # 冻结带有'model.0.'-'model.9.'的所有参数 即冻结0-9层的backbone if any(freeze): for k, v in model.named_parameters(): if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer """ nbs人为模拟的batch_size; 就比如默认的话上面设置的opt.batch_size为16,这个nbs就为64, 也就是模型梯度累积了64/16=4(accumulate)次之后 再更新一次模型,变相的扩大了batch_size """ nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing # 根据accumulate设置权重衰减系数 hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups # 将模型分成三组(w权重参数(非bn层), bias, 其他所有参数)优化 for k, v in model.named_parameters(): # named_parameters:网络层的名字和参数的迭代器 ''' (0): Focus( (conv): Conv( (conv): Conv2d(12, 80, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False) (bn): BatchNorm2d(80, eps=0.001, momentum=0.03, affine=True, track_running_stats=True) (act): Hardswish() ) ) k: 网络层可训练参数的名字所属 如: model.0.conv.conv.weight 或 model.0.conv.bn.weight 或 model.0.conv.bn.bias (Focus层举例) v: 对应网络层的具体参数 如:对应model.0.conv.conv.weight的 size为(80,12,3,3)的参数数据 即 卷积核的数量为80,深度为12,size为3×3 ''' v.requires_grad = True # 设置当前参数在训练时保留梯度信息 if '.bias' in k: pg2.append(v) # biases (所有的偏置参数) elif '.weight' in k and '.bn' not in k: pg1.append(v) # apply weight decay (非bn层的权重参数w) else: pg0.append(v) # all else (网络层的其他参数) # 选用优化器,并设置pg0组的优化方式 if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) # 设置权重参数weights(非bn层)的优化方式 optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay # 设置偏置参数bias的优化方式 optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # 设置学习率衰减,这里为余弦退火方式进行衰减 # 就是根据以下公式lf,epoch和超参数hyp['lrf']进行衰减 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[ 'lrf']) + hyp['lrf'] # cosine 匿名余弦退火函数 scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Resume # 初始化开始训练的epoch和最好的结果 # best_fitness是以[0.0, 0.0, 0.1, 0.9]为系数并乘以[精确度, 召回率, [email protected], [email protected]:0.95]再求和所得 # 根据best_fitness来保存best.pt start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer # 加载优化器与best_fitness if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results # 加载训练结果result.txt if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs # 加载上次断点模型中训练的轮次,并在此基础上继续训练 start_epoch = ckpt['epoch'] + 1 # 如果使用断点重训的同时发现 start_epoch= 0,则说明上次训练正常结束,不存在断点 if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % ( weights, epochs) shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}' ) # save previous weights # 如果新设置epochs小于加载的epoch,则视新设置的epochs为需要再训练的轮次数而不再是总的轮次数 if epochs < start_epoch: logger.info( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes # 获取模型总步长和模型输入图片分辨率 gs = int(max(model.stride)) # grid size (max stride) # 检查输入图片分辨率确保能够整除总步长gs imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode # 分布式训练,参照:https://github.com/ultralytics/yolov5/issues/475 # DataParallel模式,仅支持单机多卡,不支持混合精度训练 # rank为进程编号, 这里应该设置为rank=-1则使用DataParallel模式 # 如果 当前运行设备为gpu 且 进程编号=-1 且gpu数量大于1时 才会进行分布式训练 ,将model对象放入DataParallel容器即可进行分布式训练 if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm # 实现多GPU之间的BatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Exponential moving average ''' EMA : YOLOv5优化策略之一 EMA + SGD可提高模型鲁棒性 为模型创建EMA指数滑动平均,如果GPU进程数大于1,则不创建 ''' ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode # 如果rank不等于-1,则使用DistributedDataParallel模式 # local_rank为gpu编号,rank为进程,例如rank=3,local_rank=0 表示第 3 个进程内的第 1 块 GPU。 if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader # class dataloader 和 dataset . dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers) # 获取标签中最大的类别值,并于类别数作比较, 如果小于类别数则表示有问题 mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) ''' dataloader和testloader不同之处在于: 1. testloader:没有数据增强,rect=True(大概是测试图片保留了原图的长宽比) 2. dataloader:数据增强,保留了矩形框训练。 ''' # Process 0 if rank in [-1, 0]: # local_rank is set to -1. Because only the first process is expected to do evaluation. # testloader ema.updates = start_epoch * nb // accumulate # set EMA updates # testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, # hyp=hyp, augment=False, cache=opt.cache_images and not opt.notest, rect=True, # rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) plot_labels(labels, save_dir=log_dir) if tb_writer: # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 tb_writer.add_histogram('classes', c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters # 根据自己数据集的类别数设置分类损失的系数 hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset # 设置类别数,超参数 model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model """ 设置giou的值在objectness loss中做标签的系数, 使用代码如下 tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype) 这里model.gr=1,也就是说完全使用标签框与预测框的giou值来作为该预测框的objectness标签 """ model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) # 根据labels初始化图片采样权重(图像类别所占比例高的采样频率低) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights # 获取类别的名字 model.names = names # Start training t0 = time.time() # 获取warm-up训练的迭代次数 nw = max(round(hyp['warmup_epochs'] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training # 初始化mAP和results maps = np.zeros(nc) # mAP per class results = ( 0, 0, 0, 0, 0, 0, 0, 0 ) # P, R, [email protected], [email protected], val_loss(box, obj, cls, angleloss) """ 设置学习率衰减所进行到的轮次, 目的是打断训练后,--resume接着训练也能正常的衔接之前的训练进行学习率衰减 """ scheduler.last_epoch = start_epoch - 1 # do not move # 通过torch1.6自带的api设置混合精度训练 scaler = amp.GradScaler(enabled=cuda) """ 打印训练和测试输入图片分辨率 加载图片时调用的cpu进程数 从哪个epoch开始训练 """ logger.info( 'Image sizes %g train, %g test\nUsing %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) # 训练 for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ # model设置为训练模式,其中training属性表示BatchNorm与Dropout层在训练阶段和测试阶段中采取的策略不同,通过判断training值来决定前向传播策略 model.train() # Update image weights (optional) # 加载图片权重(可选) if opt.image_weights: # Generate indices """ 如果设置进行图片采样策略, 则根据前面初始化的图片采样权重model.class_weights以及maps配合每张图片包含的类别数 通过random.choices生成图片索引indices从而进行采样 """ if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * ( 1 - maps)**2 # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP # 如果是DDP模式,则广播采样策略 if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders # 初始化训练时打印的平均损失信息 mloss = torch.zeros(5, device=device) # mean losses if rank != -1: # DDP模式下打乱数据, ddp.sampler的随机采样数据是基于epoch+seed作为随机种子, # 每次epoch不同,随机种子就不同 dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 9) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'angle', 'total', 'targets', 'img_size')) if rank in [-1, 0]: # tqdm 创建进度条,方便训练时 信息的展示 pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------ ''' i: batch_index, 第i个batch imgs : torch.Size([batch_size, 3, resized_height, resized_weight]) targets : torch.Size = (该batch中的目标数量, [该image属于该batch的第几个图片, class, xywh, θ]) paths : List['img1_path','img2_path',......,'img-1_path'] len(paths)=batch_size shapes : size= batch_size, 不进行mosaic时进行矩形训练时才有值 ''' # ni计算迭代的次数iteration ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup """ warmup训练(前nw次迭代) 在前nw次迭代中,根据以下方式选取accumulate和学习率 """ if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): """ bias的学习率从0.1下降到基准学习率lr*lf(epoch), 其他的参数学习率从0增加到lr*lf(epoch). lf为上面设置的余弦退火的衰减函数 """ # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch) ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale # 设置多尺度训练,从imgsz * 0.5, imgsz * 1.5 + gs随机选取尺寸 if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) # 采用上采样下采样函数interpolate完成imgs尺寸的转变,模式设置为双线性插值 imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward # 前向传播 with amp.autocast(enabled=cuda): ''' 训练时返回x x list: [small_forward, medium_forward, large_forward] eg:small_forward.size=( batch_size, 3种scale框, size1, size2, no) ''' pred = model(imgs) # forward # Loss # 计算损失,包括分类损失,objectness损失,框的回归损失 # loss为总损失值,loss_items为一个元组(lbox, lobj, lcls, langle, loss) loss, loss_items = compute_loss( pred, targets.to(device), model, csl_label_flag=True) # loss scaled by batch_size if rank != -1: # 平均不同gpu之间的梯度 loss *= opt.world_size # gradient averaged between devices in DDP mode # Backward scaler.scale(loss).backward() # Optimize # 模型反向传播accumulate次之后再根据累积的梯度更新一次参数 if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: # mloss (lbox, lobj, lcls, langle, loss) # 打印显存,进行的轮次,损失,target的数量和图片的size等信息 mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 7) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) # 进度条显示以上信息 pbar.set_description(s) # Plot # 将前三次迭代batch的标签框在图片上画出来并保存 if ni < 3: f = str(log_dir / ('train_batch%g.jpg' % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image( f, result, dataformats='HWC', global_step=epoch) # 存储的格式为[H, W, C] # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: # 更新EMA的属性 # 添加include的属性 ema.update_attr( model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) final_epoch = epoch + 1 == epochs # # 判断该epoch是否为最后一轮 # if not opt.notest or final_epoch: # Calculate mAP # # 对测试集进行测试,计算mAP等指标 # # 测试时使用的是EMA模型 # results, maps, times = test.test(opt.data, # batch_size=total_batch_size, # imgsz=imgsz_test, # model=ema.ema, # single_cls=opt.single_cls, # dataloader=testloader, # save_dir=log_dir, # plots=epoch == 0 or final_epoch) # plot first and last # Write # 将测试指标写入result.txt with open(results_file, 'a') as f: f.write( s + '%10.4g' * 8 % results + '\n') # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Tensorboard # 添加指标,损失等信息到tensorboard显示 if tb_writer: tags = [ 'train/box_loss', 'train/obj_loss', 'train/cls_loss', 'train/angle_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', 'val/angle_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP # 更新best_fitness fi = fitness(np.array(results).reshape( 1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model """ 保存模型,还保存了epoch,results,optimizer等信息, optimizer信息在最后一轮完成后不会进行保存 未完成训练则保存该信息 model保存的是EMA的模型 """ save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers """ 模型训练完后,strip_optimizer函数将optimizer从ckpt中去除; 并且对模型进行model.half(), 将Float32的模型->Float16, 可以减少模型大小,提高inference速度 """ n = opt.name if opt.name.isnumeric() else '' fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt' for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename if str(f2).endswith('.pt'): # is *.pt strip_optimizer(f2) # strip optimizer # 上传结果到谷歌云盘 os.system( 'gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload # Finish # 可视化results.txt文件 if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) # 释放显存 dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def process_image(img, processing_model): (model, names) = processing_model # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # normalization gain whwh txt_path = str(out / Path(paths[si]).stem) pred[:, :4] = scale_coords(img[si].shape[1:], pred[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in pred: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(txt_path + '.txt', 'a') as f: f.write(('%g ' * 5 + '\n') % (cls, *xywh)) # label format # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = Path(paths[si]).stem box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename plot_images(img, targets, paths, str(f), names) # ground truth f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions return img
def test(self, img_raw): # number of logged images try: #found objects found = np.zeros((4, 3)) try: img_resize = cv2.resize( img_raw, (self.detection_width, self.detection_height)) img = cv2.cvtColor(img_resize, cv2.COLOR_BGR2RGB) img = img.transpose(2, 0, 1) img = torch.tensor( img.reshape( (1, 3, self.detection_height, self.detection_width))).float().to(self.device) except: traceback.print_exc() img /= 255.0 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = img.shape # batch size, channels, height, width # Run model inf_out, train_out = self.model( img) # inference and training outputs # Run NMS output = non_max_suppression(inf_out, conf_thres=0.8) if output[0] == None: return img_raw out = output[0].cpu().numpy() #sort last column out[out[:, -1].argsort()] out_filter = [] for i in range(len(out)): obj = out[i] #filter out extremely small detections # if abs(obj[3]-obj[1])<2 or abs(obj[2]-obj[0])<2 or obj[0]<0 or obj[1]<0 or obj[2]<0 or obj[3]<0: # out_filter.append(i) # continue #filter out obj not in region if obj[-1] == 0 and (obj[0] < .45 * width or obj[1] > 0.3 * height): out_filter.append(i) continue if obj[-1] == 1 and (obj[0] > .45 * width or obj[1] > 0.3 * height): out_filter.append(i) continue if obj[-1] == 2 and (obj[0] < .5 * width or obj[1] < 0.6 * height): out_filter.append(i) continue if obj[-1] == 3 and (obj[0] > .5 * width or obj[1] < 0.6 * height): out_filter.append(i) continue #if object already found if found[int(obj[-1])][0]: continue #check orientation except for bottle if obj[-1] != 3: img_crop = img_resize[int(obj[1]):int(obj[3]), int(obj[0]):int(obj[2]), :] try: angle = self.orientation( self.gt_dict[obj[-1]], self.square( cv2.cvtColor(img_crop, cv2.COLOR_BGR2GRAY), self.max_size)) - 90. except: continue else: angle = 0 found[int(obj[-1])] = np.array([(obj[0] + obj[2]) / 2, (obj[1] + obj[3]) / 2, angle]) out = np.delete(out, out_filter, axis=0) img_out = cv2.cvtColor( plot_images(img, output_to_target([torch.tensor(out)], width, height), fname=None, names=self.names), cv2.COLOR_RGB2BGR) for i in range(4): if found[i][0]: img_out = cv2.putText( img_out, str(int(found[i][-1])), (int(found[i][0]), int(found[i][1])), fontFace=cv2.FONT_HERSHEY_COMPLEX_SMALL, fontScale=.5, color=(0, 0, 0)) #my type kinect_cood_c = ( (found[i][0] * (self.detection_end_c - self.detection_start_c) / self.detection_width + self.detection_start_c) - self.center_c) / self.f kinect_cood_r = -( (found[i][1] * (self.detection_end_r - self.detection_start_r) / self.detection_height + self.detection_start_r) - self.center_r) / self.f trans = np.dot( self.H, np.array([[kinect_cood_c], [kinect_cood_r], [1]])) list(self.detection_objects.values())[i].x = trans[0][0] list(self.detection_objects.values())[i].y = trans[1][0] list(self.detection_objects.values() )[i].angle = found[i][-1] list(self.detection_objects.values())[i].detected = True else: list(self.detection_objects.values())[i].detected = False found[:, 0] *= (self.detection_end_c - self.detection_start_c) / self.detection_width found[:, 1] *= (self.detection_end_r - self.detection_start_r) / self.detection_height found += self.offset #pass to RR wire self.detection_wire.OutValue = self.detection_objects except AttributeError: pass return img_out
def test( data, weights=None, batch_size=16, imgsz=640, conf_thres=0.3, iou_thres=0.5, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir='', merge=False, emb_dim=256, save_txt=False): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly device = select_device(opt.device, batch_size=batch_size) merge, save_txt = opt.merge, opt.save_txt # use Merge NMS, save *.txt labels if save_txt: out = Path('inference/output') if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Remove previous for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')): os.remove(f) # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = [check_img_size(x, model.stride.max()) for x in imgsz] # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: if len(imgsz) == 1: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img else: img = torch.zeros((1, 3, imgsz[1], imgsz[0]), device=device) _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once root = data['root'] path = data['test'] if opt.task == 'test' else data[ 'test_emb'] # path to val/test images dataloader = create_dataloader(root, path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=False, pad=0.5, rect=False)[0] seen = 0 names = model.names if hasattr(model, 'names') else model.module.names s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. jdict, stats, ap, ap_class = [], [], [], [] loss = torch.zeros(4, device=device) for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out_p, train_out_pemb = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out_p], [x.float() for x in train_out_pemb], targets, model)[1][:4] # GIoU, obj, cls, lid # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge, emb_dim=emb_dim) t1 += time_synchronized() - t ''' images = letterbox(cv2.imread(paths[1]), [608,1088], auto=False, scaleup=False)[0] d = output[1] if d is None: continue for i in range(len(d)): cv2.rectangle(images, (int(d[i][0]), int(d[i][1])), (int(d[i][2]), int(d[i][3])), (0, 0, 255), 2) cv2.imshow("image", images) cv2.waitKey(0) ''' # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 2:6]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d not in detected: detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename plot_images(img, targets, paths, str(f), names) # ground truth f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) plot_test_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz[0], imgsz[1], batch_size) if not training: print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def train(hyp, opt, device, tb_writer=None): print(f'Hyperparameters {hyp}') log_dir = Path(tb_writer.log_dir) if tb_writer else Path( opt.logdir) / 'evolve' # logging directory wdir = str(log_dir / 'weights') + os.sep # weights directory os.makedirs(wdir, exist_ok=True) last = wdir + 'last.pt' best = wdir + 'best.pt' results_file = str(log_dir / 'results.txt') epochs, batch_size, total_batch_size, weights, rank = \ opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # TODO: Use DDP logging. Only the first process is allowed to log. # Save run settings with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict root_path = data_dict['root'] train_path = data_dict['train'] test_emb_path = data_dict['test_emb'] test_path = data_dict['test'] nc, names = (1, ['item']) if opt.single_cls else (int( data_dict['nc']), data_dict['names']) # number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Remove previous results if rank in [-1, 0]: for f in glob.glob('*_batch*.jpg') + glob.glob(results_file): os.remove(f) # Create model model = Model(opt.cfg, nc=nc).to(device) # Image sizes gs = int(max(model.stride)) # grid size (max stride) imgsz = imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # Optimizer nbs = 64 # nominal batch size # default DDP implementation is slow for accumulation according to: https://pytorch.org/docs/stable/notes/ddp.html # all-reduce operation is carried out during loss.backward(). # Thus, there would be redundant all-reduce communications in a accumulation procedure, # which means, the result is still right but the training speed gets slower. # TODO: If acceleration is needed, there is an implementation of allreduce_post_accumulation # in https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT/run_pretraining.py accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_parameters(): if v.requires_grad: if '.bias' in k: pg2.append(v) # biases elif '.weight' in k and '.bn' not in k: pg1.append(v) # apply weight decay else: pg0.append(v) # all else if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: (( (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.8 + 0.2 # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # Load Model with torch_distributed_zero_first(rank): attempt_download(weights) start_epoch, best_fitness = 0, 0.0 if weights.endswith('.pt'): # pytorch format ckpt = torch.load(weights, map_location=device) # load checkpoint # load model try: exclude = ['anchor'] # exclude keys ckpt['model'] = { k: v for k, v in ckpt['model'].float().state_dict().items() if k in model.state_dict() and not any(x in k for x in exclude) and model.state_dict()[k].shape == v.shape } model.load_state_dict(ckpt['model'], strict=False) print('Transferred %g/%g items from %s' % (len(ckpt['model']), len(model.state_dict()), weights)) except KeyError as e: s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \ "Please delete or update %s and try again, or use --weights '' to train from scratch." \ % (weights, opt.cfg, weights, weights) raise KeyError(s) from e # load optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # load results if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # epochs start_epoch = ckpt['epoch'] + 1 if epochs < start_epoch: print( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) print('Using SyncBatchNorm()') # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=(opt.local_rank)) # Trainloader dataloader, dataset = create_dataloader(root_path, train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, local_rank=rank, world_size=opt.world_size) # Testloader testloader = create_dataloader(root_path, test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, augment=False, cache=opt.cache_images, rect=True, local_rank=-1, world_size=opt.world_size)[0] mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights model.names = names # model.nID = dataset.nID # Class frequency if rank in [-1, 0]: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes plot_labels(labels, save_dir=log_dir) if tb_writer: # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 tb_writer.add_histogram('classes', c, 0) # Check anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Start training t0 = time.time() nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = ( 0, 0, 0, 0, 0, 0, 0 ) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) if rank in [0, -1]: print('Image sizes {} train, {} test'.format(str(imgsz), str(imgsz_test))) print('Using %g dataloader workers' % dataloader.num_workers) print('Starting training for %g epochs...' % epochs) # torch.autograd.set_detect_anomaly(True) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() mloss = torch.zeros(5, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) if rank in [-1, 0]: print(('\n' + '%10s' * 8 + '%13s') % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'lid', 'total', 'targets', 'img_size')) pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp( ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']]) # Multi-scale if opt.multi_scale and random.random() < 0.5: candidate_shapes = [[608, 1088], [480, 864], [320, 576], [512, 960], [384, 640]] curr_shapes = candidate_shapes[random.randint(0, 4)] imgs = F.interpolate(imgs, size=curr_shapes, mode='bilinear', align_corners=False) # Autocast with amp.autocast(enabled=cuda): # Forward pred_detect, pred_emb = model(imgs) # Loss loss, loss_items = compute_loss(pred_detect, pred_emb, targets.to(device), model) # scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema is not None: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6 + "%10s") % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], ' [%g,%g]' % (imgs.shape[-1], imgs.shape[-2])) pbar.set_description(s) # Plot if ni < 3: f = str(log_dir / ('train_batch%g.jpg' % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema is not None: ema.update_attr( model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test( opt.data, batch_size=total_batch_size, imgsz=imgsz_test, save_json=final_epoch and opt.data.endswith(os.sep + 'coco.yaml'), model=ema.ema.module if hasattr(ema.ema, 'module') else ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=log_dir, emb_dim=model.module.emb_dim) # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 8 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Tensorboard if tb_writer: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss' ] for x, tag in zip(list(mloss[:-1]) + list(results), tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema.module if hasattr(ema, 'module') else ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename ispt = f2.endswith('.pt') # is *.pt strip_optimizer(f2) if ispt else None # strip optimizer os.system('gsutil cp %s gs://%s/weights' % ( f2, opt.bucket)) if opt.bucket and ispt else None # upload # Finish if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def train(hyp, opt, device, tb_writer=None): logger.info(f'Hyperparameters {hyp}') log_dir = Path(tb_writer.log_dir) if tb_writer else Path( opt.logdir) / 'evolve' # logging directory wdir = log_dir / 'weights' # weights directory os.makedirs(wdir, exist_ok=True) last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = str(log_dir / 'results.txt') epochs, batch_size, total_batch_size, weights, rank = \ opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Save run settings with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc, names = (1, ['item']) if opt.single_cls else (int( data_dict['nc']), data_dict['names']) # number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint # added by jiangrong if not opt.resume: ckpt['epoch'] = -1 if opt.nas: model = NasModel(opt.cfg, ch=3, nc=nc, nas=opt.nas, nas_stage=opt.nas_stage).to(device) # create else: model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create exclude = ['anchor'] if opt.cfg else [] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info( 'Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: if opt.nas: model = NasModel(opt.cfg, ch=3, nc=nc, nas=opt.nas, nas_stage=opt.nas_stage).to(device) # create if opt.nas_stage == 3: # TODO, Remapping with BN Statistics on Width-level model.re_organize_middle_weights() else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create if opt.nas and opt.nas_stage > 0: from models.experimental import attempt_load """ P R [email protected] 0.535 0.835 0.742 python test.py \ --weights /workspace/yolov5-v3/yolov5/runs/exp122/weights/best.pt \ --data ./data/baiguang.yaml \ --device 1 \ --conf-thres 0.2 """ teacher_model = attempt_load( "/workspace/yolov5-v3/yolov5/runs/exp259/weights/best.pt", map_location='cuda:1') teacher_model.eval() # Freeze freeze = [ '', ] # parameter names to freeze (full or partial) if any(freeze): for k, v in model.named_parameters(): if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_parameters(): v.requires_grad = True if '.bias' in k: pg2.append(v) # biases elif '.weight' in k and '.bn' not in k: pg1.append(v) # apply weight decay else: pg0.append(v) # all else if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: (( (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.8 + 0.2 # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None and not opt.nas > 0: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % ( weights, epochs) shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}' ) # save previous weights if epochs < start_epoch: logger.info( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes gs = int(max(model.stride)) # grid size (max stride) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode # TheModel = model if cuda and rank == -1 and torch.cuda.device_count() > 1 and not ( opt.nas and opt.nas_stage > 0): # https://pytorch.org/docs/stable/generated/torch.nn.DataParallel.html # >>> net = torch.nn.DataParallel(model, device_ids=[0, 1, 2]) # >>> output = net(input_var) # input_var can be on any device, including CPU model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=(opt.local_rank)) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Testloader if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader( test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, augment=False, cache=opt.cache_images, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # only runs on process 0 # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights model.names = names # Class frequency if rank in [-1, 0]: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # model._initialize_biases(cf.to(device)) plot_labels(labels, save_dir=log_dir) if tb_writer: # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 tb_writer.add_histogram('classes', c, 0) # Check anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Start training t0 = time.time() nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = ( 0, 0, 0, 0, 0, 0, 0 ) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' scheduler.last_epoch = start_epoch - 1 # do not move # scaler = amp.GradScaler(enabled=cuda) logger.info('Image sizes %g train, %g test' % (imgsz, imgsz_test)) logger.info('Using %g dataloader workers' % dataloader.num_workers) logger.info('Starting training for %g epochs...' % epochs) # torch.autograd.set_detect_anomaly(True) plot_csum = 0 for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if dataset.image_weights: # Generate indices if rank in [-1, 0]: w = model.class_weights.cpu().numpy() * ( 1 - maps)**2 # class weights image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w) dataset.indices = random.choices( range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = torch.zeros([dataset.n], dtype=torch.int) if rank == 0: indices[:] = torch.tensor(dataset.indices, dtype=torch.int) dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- # print(type(targets), targets.size()) # [[_,classid(start from 0), x,y,w,h (0-1)]] # print('---> targets: ', targets) ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp( ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward ###### jiangrong, turn off mixed precision ########## # with amp.autocast(enabled=cuda): if 1 == 1: pred = model(imgs) # forward, format x(bs,3,20,20,80+1+4) loss, loss_items = compute_loss( pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode # z= [] # for i in range(TheModel._modules['model'][-1].nl): # bs, _, ny, nx, _ = pred[i].shape # if TheModel._modules['model'][-1].grid[i].shape[2:4] != pred[i].shape[2:4]: # TheModel._modules['model'][-1].grid[i] = TheModel._modules['model'][-1]._make_grid(nx, ny).to(pred[i].device) # y = pred[i].sigmoid() # y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + TheModel._modules['model'][-1].grid[i].to(pred[i].device)) * TheModel._modules['model'][-1].stride[i] # xy # y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * TheModel._modules['model'][-1].anchor_grid[i] # wh # z.append(y.view(bs, -1, TheModel._modules['model'][-1].no)) # inf_out = torch.cat(z, 1) # teacher_pred = non_max_suppression(inf_out, conf_thres=0.2, iou_thres=0.6, merge=False) # assert len(teacher_pred) == imgs.size()[0] # for i, (det, plot_img) in enumerate(zip(teacher_pred, imgs.detach().cpu().numpy())): # plot_img = np.transpose(plot_img, (1,2,0)) # plot_img = np.uint8(plot_img * 255.0) # plot_csum += 1 # cv2.imwrite('./tmp/{}.jpg'.format(plot_csum), plot_img) # plot_img = cv2.imread('./tmp/{}.jpg'.format(plot_csum)) # for tgt in targets.detach().cpu().numpy(): # _, tgt_class_id, c_x, c_y, c_w, c_h = tgt # c_x, c_y, c_w, c_h = float(c_x), float(c_y), float(c_w), float(c_h) # c_x, c_y, c_w, c_h = c_x * plot_img.shape[1], c_y * plot_img.shape[0], c_w * plot_img.shape[1], c_h * plot_img.shape[0] # cv2.rectangle(plot_img, (int(c_x - c_w / 2), int(c_y - c_h / 2)), (int(c_x + c_w / 2), int(c_y + c_h / 2)), (0,0,255), 2) # print('===> ', int(c_x - c_w / 2), int(c_y - c_h / 2), int(c_x + c_w / 2), int(c_y + c_h / 2), tgt_class_id) # if det is not None: # det = det.detach().cpu().numpy() # for each_b in det: # pass # cv2.rectangle(plot_img, (int(each_b[0]), int(each_b[1])), (int(each_b[2]), int(each_b[3])), (255,0,0), 2) # print('---> ', int(each_b[0]), int(each_b[1]), int(each_b[2]), int(each_b[3]), float(each_b[4]), int(each_b[5])) # cv2.imwrite('./tmp/{}.jpg'.format(plot_csum), plot_img) if opt.nas and opt.nas_stage > 0: teacher_imgs = imgs.to('cuda:1') with torch.no_grad(): inf_out, _ = teacher_model(teacher_imgs) # forward # filter by obj confidence 0.05 teacher_pred = non_max_suppression_teacher( inf_out, conf_thres=0.05, iou_thres=0.6, merge=False ) # (x1, y1, x2, y2, conf, cls) in resized image size teacher_targets = teacher2targets(teacher_pred, teacher_imgs) # print('---> teacher_pred', teacher_pred) # print('---> targets', targets) # print('---> teacher_targets', teacher_targets) # TODO: apply soft label loss teacher_loss, teacher_loss_items = compute_teacher_loss( pred, teacher_targets.to(device), model) # loss scaled by batch_size # print("===> origin loss", loss, loss_items) # print("===> teacher loss", teacher_loss, teacher_loss_items) teacher_loss_scale = 2.0 loss += teacher_loss * teacher_loss_scale loss_items += teacher_loss_items * teacher_loss_scale ########## the targets and teacher predictions are matched, but they both can not be restored to the image, need TODO!! ########### # assert len(teacher_pred) == imgs.size()[0] # for i, (det, plot_img) in enumerate(zip(teacher_pred, imgs.detach().cpu().numpy())): # plot_img = np.transpose(plot_img, (1,2,0)) # plot_img = np.uint8(plot_img * 255.0) # plot_csum += 1 # cv2.imwrite('./tmp/{}.jpg'.format(plot_csum), plot_img) # plot_img = cv2.imread('./tmp/{}.jpg'.format(plot_csum)) # for tgt in targets.detach().cpu().numpy(): # _, tgt_class_id, c_x, c_y, c_w, c_h = tgt # c_x, c_y, c_w, c_h = float(c_x), float(c_y), float(c_w), float(c_h) # c_x, c_y, c_w, c_h = c_x * plot_img.shape[1], c_y * plot_img.shape[0], c_w * plot_img.shape[1], c_h * plot_img.shape[0] # cv2.rectangle(plot_img, (int(c_x - c_w / 2), int(c_y - c_h / 2)), (int(c_x + c_w / 2), int(c_y + c_h / 2)), (0,0,255), 2) # print('===> ', int(c_x - c_w / 2), int(c_y - c_h / 2), int(c_x + c_w / 2), int(c_y + c_h / 2), tgt_class_id) # if det is not None: # det = det.detach().cpu().numpy() # for each_b in det: # pass # cv2.rectangle(plot_img, (int(each_b[0]), int(each_b[1])), (int(each_b[2]), int(each_b[3])), (255,0,0), 2) # print('---> ', int(each_b[0]), int(each_b[1]), int(each_b[2]), int(each_b[3]), float(each_b[4]), int(each_b[5])) # cv2.imwrite('./tmp/{}.jpg'.format(plot_csum), plot_img) # Backward # scaler.scale(loss).backward() loss.backward() # Optimize if ni % accumulate == 0: # scaler.step(optimizer) # optimizer.step # scaler.update() optimizer.step() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if ni < 3: f = str(log_dir / ('train_batch%g.jpg' % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: ema.update_attr( model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP if opt.nas: # only evaluate the super network ema.ema.nas_stage = 0 results, maps, times = test.test(opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=log_dir) if opt.nas: ema.ema.nas_stage = opt.nas_stage # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Tensorboard if tb_writer: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name fresults, flast, fbest = 'results%s.txt' % n, wdir / f'last{n}.pt', wdir / f'best{n}.pt' for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', 'results.txt'], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename if str(f2).endswith('.pt'): # is *.pt strip_optimizer(f2) # strip optimizer os.system( 'gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload # Finish if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def test(data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling save_conf=False, plots=True, log_imgs=0): # number of logged images # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) save_txt = opt.save_txt # save *.txt labels # Directories if save_dir == Path('runs/test'): # if default save_dir.mkdir(parents=True, exist_ok=True) # make base save_dir = Path(increment_dir(save_dir / 'exp', opt.name)) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make new dir # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Logging log_imgs = min(log_imgs, 100) # ceil try: import wandb # Weights & Biases except ImportError: log_imgs = 0 # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 names = {k: v for k, v in enumerate(model.names if hasattr(model, 'names') else model.module.names)} coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class, wandb_images = [], [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model(img, augment=augment) # inference and training outputs # print('shape') # print(inf_out.shape) # print('ceterx, cetery, w, h') # print(inf_out[0][..., 0:4]) # ceterx, cetery, w, h # print('cls_conf') # print(inf_out[0][..., 4]) # cls_conf # print('obj_conf') # print(inf_out[0][..., 5:]) # obj_conf t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0]] # gain whwh x = pred.clone() x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in x: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh line = (cls, *xywh, conf) if save_conf else (cls, *xywh) # label format with open(str(save_dir / 'labels' / Path(paths[si]).stem) + '.txt', 'a') as f: f.write(('%g ' * len(line) + '\n') % line) # W&B logging if len(wandb_images) < log_imgs: box_data = [{"position": {"minX": xyxy[0], "minY": xyxy[1], "maxX": xyxy[2], "maxY": xyxy[3]}, "class_id": int(cls), "box_caption": "%s %.3f" % (names[cls], conf), "scores": {"class_score": conf}, "domain": "pixel"} for *xyxy, conf, cls in pred.clone().tolist()] boxes = {"predictions": {"box_data": box_data, "class_labels": names}} wandb_images.append(wandb.Image(img[si], boxes=boxes)) # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = Path(paths[si]).stem box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5)}) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view(-1) # target indices 1xn pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view(-1) # prediction indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max(1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[pi[j]] = ious[j] > iouv # iou_thres is 1xn if len(detected) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append((correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 1: f = save_dir / f'test_batch{batch_i}_labels.jpg' # filename plot_images(img, targets, paths, str(f), names) # labels f = save_dir / f'test_batch{batch_i}_pred.jpg' plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # W&B logging if wandb_images: wandb.log({"outputs": wandb_images}) # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=save_dir / 'precision-recall_curve.png') p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print('Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and len(jdict): w = Path(weights[0] if isinstance(weights, list) else weights).stem if weights is not None else '' # weights file = save_dir / f"detections_val2017_{w}_results.json" # predicted annotations file print('\nCOCO mAP with pycocotools... saving %s...' % file) with open(file, 'w') as f: json.dump(jdict, f) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] cocoAnno = COCO(glob.glob('../coco/annotations/instances_val*.json')[0]) # initialize COCO annotations api cocoPred = cocoAnno.loadRes(str(file)) # initialize COCO pred api cocoEval = COCOeval(cocoAnno, cocoPred, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[:2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results if not training: print('Results saved to %s' % save_dir) model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def train(hyp, opt, device, tb_writer=None): # 控制台打印日志 logger.info(f'Hyperparameters {hyp}') log_dir = Path(tb_writer.log_dir) if tb_writer else Path( opt.logdir) / 'evolve' # logging directory wdir = log_dir / 'weights' # weights directory os.makedirs(wdir, exist_ok=True) last = wdir / 'last.pt' best = wdir / 'best.pt' results_file = str(log_dir / 'results.txt') # weights:权重文件(预训练的);rank:全局进程; epochs, batch_size, total_batch_size, weights, rank = \ opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank # Save run settings with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' # 初始化随机种子(numpy,random,torch的) init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict ''' 执行逻辑:如果是0号进程进来会直接从torch_distributed_zero_first返回,然后执行check_dataset, 执行完check_dataset后会进入torch_distributed_zero_first函数从yield之下开始执行barrier函数暂停等到所有线程都到这个函数再继续执行, 如果不是0号线程进入那么就会执行barrier函数等待,等到所有进程都进入此函数的时候解除barrier继续执行 解除barrier的方法就是等所有进程都执行barrier函数的时候就会解除 ''' # 核实数据 with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc, names = (1, ['item']) if opt.single_cls else (int( data_dict['nc']), data_dict['names']) # number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model pretrained = weights.endswith('.pt') # 加载预训练的模型参数 if pretrained: # 下载与训练数据 with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint if hyp.get('anchors'): ckpt['model'].yaml['anchors'] = round( hyp['anchors']) # force autoanchor # 载入输入的配置或者是加载的pretrained的配置,ch=3是输入channel model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [ ] # exclude keys state_dict = ckpt['model'].float().state_dict() # to FP32 # 只加载在预训练的模型和当前模型中都有的组件的参数,这要求与训练的模型和当前模型的shape要相等 # intersect_dicts的左右就是将与训练的模型参数和当前的模型参数进行比较,取shape一致的那些参数(shape不一样的是没法运用在当前模型的) state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect # 非严格模式加载参数 model.load_state_dict(state_dict, strict=False) # load logger.info( 'Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze # 冻结某几层,finetune可以用 freeze = [ '', ] # parameter names to freeze (full or partial) if any(freeze): for k, v in model.named_parameters(): if any(x in k for x in freeze): print('freezing %s' % k) v.requires_grad = False # Optimizer # 将batch_size和64比较,当64不是batch_size的整数倍的时候,权重做相应的调整 nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_parameters(): v.requires_grad = True if '.bias' in k: pg2.append(v) # biases elif '.weight' in k and '.bn' not in k: pg1.append(v) # apply weight decay else: pg0.append(v) # all else if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) # 添加卷积权重参数和biases参数,其中biases不需要权重衰减,这里的params只能是这个名字,执行完add_param_group后optimizer的数据就是一个list中有三个值 optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR # 权重衰减策略 lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp[ 'lrf']) + hyp['lrf'] # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if opt.resume: assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % ( weights, epochs) shutil.copytree(wdir, wdir.parent / f'weights_backup_epoch{start_epoch - 1}' ) # save previous weights if epochs < start_epoch: logger.info( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes gs = int(max(model.stride)) # grid size (max stride) # 得到处理(就是让图片大小能被网格大小整除)后的图像和测试图像的大小 imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode # 分布式(单机多GPU) if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: # 同步batchnorm,如果不同步的话每个GPU上的batchnorm都会使用当前GPU上数据的方差和均值,那几个GPU虽然训练的是同一个batch的数据,值却是不一样的 model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info('Using SyncBatchNorm()') # Exponential moving average 指数移动平均 # 给予近期数据更高的权重,就是说对于参数,我们给予最近的几次的参数更高的权重,其假设就是最近几次的参数是在最优处抖动,所以最近几次的参数权重就给高点 ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode # 分布式(多机器多GPU) if cuda and rank != -1: # local_rank指定的是当前进程使用的是哪块GPU,local_rank表示的就是GPU序号 model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, augment=False, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers)[0] # testloader if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) plot_labels(labels, save_dir=log_dir) if tb_writer: # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 tb_writer.add_histogram('classes', c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0 ) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info('Image sizes %g train, %g test\n' 'Using %g dataloader workers\nLogging results to %s\n' 'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = model.class_weights.cpu().numpy() * ( 1 - maps)**2 # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size')) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch) ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss( pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if ni < 3: f = str(log_dir / ('train_batch%g.jpg' % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: ema.update_attr( model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride']) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test( opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=log_dir, plots=epoch == 0 or final_epoch) # plot first and last # Write with open(results_file, 'a') as f: f.write( s + '%10.4g' * 7 % results + '\n') # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Tensorboard if tb_writer: tags = [ 'train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema, 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers n = opt.name if opt.name.isnumeric() else '' fresults, flast, fbest = log_dir / f'results{n}.txt', wdir / f'last{n}.pt', wdir / f'best{n}.pt' for f1, f2 in zip([wdir / 'last.pt', wdir / 'best.pt', results_file], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename if str(f2).endswith('.pt'): # is *.pt strip_optimizer(f2) # strip optimizer os.system( 'gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket else None # upload # Finish if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def train(hyp, opt, device, tb_writer=None): logger.info(f"Hyperparameters {hyp}") log_dir = (Path(tb_writer.log_dir) if tb_writer else Path(opt.logdir) / "evolve") # logging directory wdir = log_dir / "weights" # weights directory os.makedirs(wdir, exist_ok=True) last = wdir / "last.pt" best = wdir / "best.pt" results_file = str(log_dir / "results.txt") epochs, batch_size, total_batch_size, weights, rank = ( opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank, ) # Save run settings with open(log_dir / "hyp.yaml", "w") as f: yaml.dump(hyp, f, sort_keys=False) with open(log_dir / "opt.yaml", "w") as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != "cpu" init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict with torch_distributed_zero_first(rank): check_dataset(data_dict) # check train_path = data_dict["train"] test_path = data_dict["val"] nc, names = ( (1, ["item"]) if opt.single_cls else (int(data_dict["nc"]), data_dict["names"])) # number classes, names assert len(names) == nc, "%g names found for nc=%g dataset in %s" % ( len(names), nc, opt.data, ) # check # Model pretrained = weights.endswith(".pt") if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint if hyp.get("anchors"): ckpt["model"].yaml["anchors"] = round( hyp["anchors"]) # force autoanchor model = Model(opt.cfg or ckpt["model"].yaml, ch=3, nc=nc).to(device) # create exclude = ["anchor"] if opt.cfg or hyp.get("anchors") else [ ] # exclude keys state_dict = ckpt["model"].float().state_dict() # to FP32 state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect model.load_state_dict(state_dict, strict=False) # load logger.info( "Transferred %g/%g items from %s" % (len(state_dict), len(model.state_dict()), weights)) # report else: model = Model(opt.cfg, ch=3, nc=nc).to(device) # create # Freeze freeze = [ "", ] # parameter names to freeze (full or partial) if any(freeze): for k, v in model.named_parameters(): if any(x in k for x in freeze): print("freezing %s" % k) v.requires_grad = False # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp["weight_decay"] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_parameters(): v.requires_grad = True if ".bias" in k: pg2.append(v) # biases elif ".weight" in k and ".bn" not in k: pg1.append(v) # apply weight decay else: pg0.append(v) # all else if opt.adam: optimizer = optim.Adam(pg0, lr=hyp["lr0"], betas=(hyp["momentum"], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp["lr0"], momentum=hyp["momentum"], nesterov=True) optimizer.add_param_group({ "params": pg1, "weight_decay": hyp["weight_decay"] }) # add pg1 with weight_decay optimizer.add_param_group({"params": pg2}) # add pg2 (biases) logger.info("Optimizer groups: %g .bias, %g conv.weight, %g other" % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = (lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp["lrf"]) + hyp["lrf"]) # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt["optimizer"] is not None: optimizer.load_state_dict(ckpt["optimizer"]) best_fitness = ckpt["best_fitness"] # Results if ckpt.get("training_results") is not None: with open(results_file, "w") as file: file.write(ckpt["training_results"]) # write results.txt # Epochs start_epoch = ckpt["epoch"] + 1 if opt.resume: assert start_epoch > 0, ( "%s training to %g epochs is finished, nothing to resume." % (weights, epochs)) shutil.copytree(wdir, wdir.parent / f"weights_backup_epoch{start_epoch - 1}" ) # save previous weights if epochs < start_epoch: logger.info( "%s has been trained for %g epochs. Fine-tuning for %g additional epochs." % (weights, ckpt["epoch"], epochs)) epochs += ckpt["epoch"] # finetune additional epochs del ckpt, state_dict # Image sizes gs = int(max(model.stride)) # grid size (max stride) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) logger.info("Using SyncBatchNorm()") # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank) # Trainloader dataloader, dataset = create_dataloader( train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank, world_size=opt.world_size, workers=opt.workers, ) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, ( "Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g" % (mlc, nc, opt.data, nc - 1)) # Process 0 if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader( test_path, imgsz_test, total_batch_size, gs, opt, hyp=hyp, augment=False, cache=opt.cache_images and not opt.notest, rect=True, rank=-1, world_size=opt.world_size, workers=opt.workers, )[0] # testloader if not opt.resume: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency # model._initialize_biases(cf.to(device)) plot_labels(labels, save_dir=log_dir) if tb_writer: # tb_writer.add_hparams(hyp, {}) # causes duplicate https://github.com/ultralytics/yolov5/pull/384 tb_writer.add_histogram("classes", c, 0) # Anchors if not opt.noautoanchor: check_anchors(dataset, model=model, thr=hyp["anchor_t"], imgsz=imgsz) # Model parameters hyp["cls"] *= nc / 80.0 # scale coco-tuned hyp['cls'] to current dataset model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp["warmup_epochs"] * nb), 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0 ) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) logger.info("Image sizes %g train, %g test\n" "Using %g dataloader workers\nLogging results to %s\n" "Starting training for %g epochs..." % (imgsz, imgsz_test, dataloader.num_workers, log_dir, epochs)) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices if rank in [-1, 0]: cw = (model.class_weights.cpu().numpy() * (1 - maps)**2 ) # class weights iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights dataset.indices = random.choices( range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int() dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) logger.info( ("\n" + "%10s" * 8) % ("Epoch", "gpu_mem", "box", "obj", "cls", "total", "targets", "img_size")) if rank in [-1, 0]: pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for ( i, (imgs, targets, paths, _), ) in ( pbar ): # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = (imgs.to(device, non_blocking=True).float() / 255.0 ) # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x["lr"] = np.interp( ni, xi, [ hyp["warmup_bias_lr"] if j == 2 else 0.0, x["initial_lr"] * lf(epoch), ], ) if "momentum" in x: x["momentum"] = np.interp( ni, xi, [hyp["warmup_momentum"], hyp["momentum"]]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode="bilinear", align_corners=False) # Forward with amp.autocast(enabled=cuda): pred = model(imgs) # forward loss, loss_items = compute_loss( pred, targets.to(device), model) # loss scaled by batch_size if rank != -1: loss *= (opt.world_size ) # gradient averaged between devices in DDP mode # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema: ema.update(model) # Print if rank in [-1, 0]: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = "%.3gG" % (torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0) # (GB) s = ("%10s" * 2 + "%10.4g" * 6) % ( "%g/%g" % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1], ) pbar.set_description(s) # Plot if ni < 3: f = str(log_dir / ("train_batch%g.jpg" % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image(f, result, dataformats="HWC", global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler lr = [x["lr"] for x in optimizer.param_groups] # for tensorboard scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema: ema.update_attr( model, include=["yaml", "nc", "hyp", "gr", "names", "stride"]) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test( opt.data, batch_size=total_batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=log_dir, plots=epoch == 0 or final_epoch, ) # plot first and last # Write with open(results_file, "a") as f: f.write( s + "%10.4g" * 7 % results + "\n") # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system("gsutil cp %s gs://%s/results/results%s.txt" % (results_file, opt.bucket, opt.name)) # Tensorboard if tb_writer: tags = [ "train/box_loss", "train/obj_loss", "train/cls_loss", # train loss "metrics/precision", "metrics/recall", "metrics/mAP_0.5", "metrics/mAP_0.5:0.95", "val/box_loss", "val/obj_loss", "val/cls_loss", # val loss "x/lr0", "x/lr1", "x/lr2", ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, "r") as f: # create checkpoint ckpt = { "epoch": epoch, "best_fitness": best_fitness, "training_results": f.read(), "model": ema.ema, "optimizer": None if final_epoch else optimizer.state_dict(), } # Save last, best and delete torch.save(ckpt, last) if best_fitness == fi: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers n = opt.name if opt.name.isnumeric() else "" fresults, flast, fbest = ( log_dir / f"results{n}.txt", wdir / f"last{n}.pt", wdir / f"best{n}.pt", ) for f1, f2 in zip([wdir / "last.pt", wdir / "best.pt", results_file], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename if str(f2).endswith(".pt"): # is *.pt strip_optimizer(f2) # strip optimizer os.system( "gsutil cp %s gs://%s/weights" % (f2, opt.bucket)) if opt.bucket else None # upload # Finish if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png logger.info("%g epochs completed in %.3f hours.\n" % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def main(opts): epochs = opts.epochs # 选择设备 device = torch.device("cuda:0" if (not opts.cpu and torch.cuda.is_available()) else "cpu") cuda = device.type != 'cpu' logger.info('Use device %s.' % device) # 定义网络 stnet = STNet() lprnet = LPRNet(class_num=len(CHARS), dropout_rate=opts.lpr_dropout_rate) model_info(stnet, 'st') model_info(lprnet, 'lpr') stnet, lprnet = stnet.to(device), lprnet.to(device) logger.info("Build network is successful.") # 优化器 optimizer_params = [ {'params': stnet.parameters(), 'weight_decay': opts.st_weight_decay}, {'params': lprnet.parameters(), 'weight_decay': opts.lpr_weight_decay} ] if opts.adam: optimizer = torch.optim.Adam(optimizer_params, lr=opts.lr, betas=(opts.momentum, 0.999)) else: optimizer = torch.optim.SGD(optimizer_params, lr=opts.lr, momentum=opts.momentum, nesterov=True) del optimizer_params # 损失函数 ctc_loss = torch.nn.CTCLoss(blank=len(CHARS) - 1, reduction='mean') # reduction: 'none' | 'mean' | 'sum' # lr 自动调整器 lf = lambda e: (((1 + math.cos(e * math.pi / epochs)) / 2) ** 1.0) * 0.8 + 0.2 # cosine scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) del lf # TB logger.info('Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/' % opts.worker_dir) tb_writer = SummaryWriter(log_dir=opts.out_dir) # runs/exp0 # Resume start_epoch = 1 if opts.weights: ckpt = torch.load(opts.weights, map_location=device) # 加载网络 if 'stn' in ckpt: # 兼容旧的保存格式 stnet.load_state_dict(ckpt["stn"]) else: stnet.load_state_dict(ckpt["st"]) lprnet.load_state_dict(ckpt["lpr"]) # 优化器 if 'optimizer_type' in ckpt: # 兼容 final.pt optimizer_type = 'adam' if opts.adam else 'sgd' if optimizer_type == ckpt['optimizer_type']: optimizer.load_state_dict(ckpt['optimizer']) else: logger.warning('Optimizer is changed, state has been lost.') # Epochs start_epoch = ckpt['epoch'] + 1 if epochs < start_epoch: logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (opts.weights, start_epoch - 1, start_epoch + epochs)) epochs += start_epoch # 释放内存 del ckpt, optimizer_type # Print logger.info('Load checkpoint completed.') # 加载数据 train_dataset = LPRDataSet(opts.img_size, 0, .85, cache=opts.cache_images) test_dataset = LPRDataSet(opts.img_size, .8501, .15, cache=opts.cache_images) train_loader = DataLoader(train_dataset, batch_size=opts.batch_size, shuffle=True, num_workers=opts.workers, pin_memory=cuda, collate_fn=collate_fn) test_loader = DataLoader(test_dataset, batch_size=opts.test_batch_size, shuffle=False, num_workers=opts.workers, pin_memory=cuda, collate_fn=collate_fn) # 设置已经进行的轮数 scheduler.last_epoch = start_epoch - 2 # 因为 epoch 从 1 开始 # 自动半精度优化 scaler = torch.cuda.amp.GradScaler(enabled=cuda) best_acc = -1.0 logger.info('Image sizes %d train, %d test' % (len(train_dataset), len(test_dataset))) logger.info('Using %d dataloader workers' % opts.workers) logger.info('Starting training for %d epochs...' % start_epoch) for epoch in range(start_epoch, epochs + 1): stnet.train() lprnet.train() optimizer.zero_grad() mloss = .0 pbar = tqdm(enumerate(train_loader), total=len(train_loader), desc='Train(%d/%d)' % (epoch, epochs)) for i, (imgs, labels, lengths) in pbar: imgs, labels = imgs.to(device, non_blocking=True).float(), labels.to(device, non_blocking=True).float() # 泛化 imgs -= 127.5 imgs *= .0078431 # 127.5 * 0.0078431 = 0.99999525 # 随机底片 if random.random() > .5: imgs = -imgs # 准备 loss 计算的参数 input_lengths, target_lengths = sparse_tuple_for_ctc(opts.lpr_max_len, lengths) # Forward with torch.cuda.amp.autocast(enabled=cuda): st_result = stnet(imgs) x = lprnet(st_result) x = x.permute(2, 0, 1) # [batch_size, chars, width] -> [width, batch_size, chars] x = x.log_softmax(2).requires_grad_() loss = ctc_loss(x, labels, input_lengths=input_lengths, target_lengths=target_lengths) # Backward scaler.scale(loss).backward() # Optimize scaler.step(optimizer) scaler.update() optimizer.zero_grad() # Print mloss = (mloss * i + loss.item()) / (i + 1) # update mean losses lr = optimizer.param_groups[0]['lr'] pbar.set_description('Train(%d/%d), lr: %.5f, mloss: %.5f' % (epoch, epochs, lr, mloss)) # tb if epoch <= 3 and i < 3: if epoch == 1 and i == 0: tb_writer.add_graph(MultiModelWrapper([stnet, lprnet]), imgs) # add model to tensorboard f = os.path.join(opts.out_dir, 'train_batch_%d_%d.jpg' % (epoch, i)) # filename result = plot_images(images=imgs, fname=f) if result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) if i == 0 and opts.tb_st: f = os.path.join(opts.out_dir, 'train_batch_st_%d.jpg' % epoch) # filename result = plot_images(images=st_result.detach(), fname=f) if result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) del st_result, x, loss # Scheduler scheduler.step() # Save model saved_data = { "epoch": epoch, "lpr": lprnet.state_dict(), "st": stnet.state_dict(), 'optimizer': optimizer.state_dict(), 'optimizer_type': 'adam' if opts.adam else 'sgd' } if (not opts.nosave or epoch == epochs) and epoch % opts.save_epochs == 0: torch.save(saved_data, os.path.join(opts.weights_dir, 'last.pt')) # Evaluate test if (not opts.notest or epoch == epochs) and epoch % opts.test_epochs == 0: stnet.eval() lprnet.eval() test_mloss, test_macc = test(lprnet, stnet, test_loader, test_dataset, device, ctc_loss, opts.lpr_max_len, opts.float_test) # save best weights if best_acc <= test_macc: best_acc = test_macc if not opts.nosave: torch.save(saved_data, os.path.join(opts.weights_dir, 'best.pt')) # tb tb_writer.add_scalar('val/mloss', test_mloss, epoch) tb_writer.add_scalar('val/macc', test_macc, epoch) del saved_data # tb tb_writer.add_scalar('train/mloss', mloss, epoch) tb_writer.add_scalar('train/lr', lr, epoch) # Split line logger.info('') # Save final weights torch.save({ "epoch": epochs, "lpr": lprnet.state_dict(), "st": stnet.state_dict() }, os.path.join(opts.weights_dir, 'final.pt')) logger.info('Training complete, .')
def test( data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS save_json=False, single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir='', merge=False, save_txt=False): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) merge, save_txt = opt.merge, opt.save_txt # use Merge NMS, save *.txt labels if save_txt: out = Path('inference/output') if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Remove previous for f in glob.glob(str(Path(save_dir) / 'test_batch*.jpg')): os.remove(f) # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data[ 'val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 names = model.names if hasattr(model, 'names') else model.module.names coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # GIoU, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres, merge=merge) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh x = pred.clone() x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in x: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f: f.write( ('%g ' * 5 + '\n') % (cls, *xywh)) # label format # Clip boxes to image bounds clip_coords(pred, (height, width)) # Append to pycocotools JSON dictionary if save_json: # [{"image_id": 42, "category_id": 18, "bbox": [258.15, 41.29, 348.26, 243.78], "score": 0.236}, ... image_id = Path(paths[si]).stem box = pred[:, :4].clone() # xyxy scale_coords(img[si].shape[1:], box, shapes[si][0], shapes[si][1]) # to original shape box = xyxy2xywh(box) # xywh box[:, :2] -= box[:, 2:] / 2 # xy center to top-left corner for p, b in zip(pred.tolist(), box.tolist()): jdict.append({ 'image_id': int(image_id) if image_id.isnumeric() else image_id, 'category_id': coco91class[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if batch_i < 1: f = Path(save_dir) / ('test_batch%g_gt.jpg' % batch_i) # filename plot_images(img, targets, paths, str(f), names) # ground truth f = Path(save_dir) / ('test_batch%g_pred.jpg' % batch_i) plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats) p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Save JSON if save_json and len(jdict): f = 'detections_val2017_%s_results.json' % \ (weights.split(os.sep)[-1].replace('.pt', '') if isinstance(weights, str) else '') # filename print('\nCOCO mAP with pycocotools... saving %s...' % f) with open(f, 'w') as file: json.dump(jdict, file) try: # https://github.com/cocodataset/cocoapi/blob/master/PythonAPI/pycocoEvalDemo.ipynb from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval imgIds = [int(Path(x).stem) for x in dataloader.dataset.img_files] cocoGt = COCO( glob.glob('../coco/annotations/instances_val*.json') [0]) # initialize COCO ground truth api cocoDt = cocoGt.loadRes(f) # initialize COCO pred api cocoEval = COCOeval(cocoGt, cocoDt, 'bbox') cocoEval.params.imgIds = imgIds # image IDs to evaluate cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() map, map50 = cocoEval.stats[: 2] # update results ([email protected]:0.95, [email protected]) except Exception as e: print('ERROR: pycocotools unable to run: %s' % e) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t
def training_step(self, i): ni = i + self.nb * self.epoch # number integrated batches (since train start) self.imgs = self.imgs.to(self.device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= self.nw: xi = [0, self.nw] # x interp accumulate = max( 1, np.interp(ni, xi, [1, self.nbs / self.total_batch_size]).round()) for j, x in enumerate(self.optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [ self.hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * self.lf(self.epoch) ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [self.hyp['warmup_momentum'], self.hyp['momentum']]) # Multi-scale if self.opt.multi_scale: sz = random.randrange(self.imgsz * 0.5, self.mgsz * 1.5 + self.gs) // self.gs * self.gs # size sf = sz / max(self.imgs.shape[2:]) # scale factor if sf != 1: ns = [ math.ceil(x * sf / self.gs) * self.gs for x in self.imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(self.imgs, size=ns, mode='bilinear', align_corners=False) # Forward with amp.autocast(enabled=self.cuda): pred = self.model(self.imgs) # forward loss, loss_items = compute_losst(pred, self.targets.to( self.device), self.model) # loss scaled by batch_size if self.rank != -1: loss *= self.opt.world_size # gradient averaged between devices in DDP mode # Backward self.scaler.scale(loss).backward() # Optimize self.scaler.step(self.optimizer) # optimizer.step self.scaler.update() self.optimizer.zero_grad() if self.ema: self.ema.update(self.model) # Print if self.rank in [-1, 0]: self.mloss = (self.mloss * i + loss_items) / ( i + 1) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) self.s = ('%10s' * 2 + '%10.4g' * 6) % ( '%g/%g' % (self.epoch, self.epochs - 1), mem, *self.mloss, self.targets.shape[0], self.imgs.shape[-1]) self.pbar.set_description(self.s) # Plot if ni < 3: f = str(self.log_dir / f'train_batch{ni}.jpg') # filename result = plot_images(images=self.imgs, targets=self.targets, paths=self.paths, fname=f)
def train(hyp, opt, device, tb_writer=None): print(f'Hyperparameters {hyp}') log_dir = Path(tb_writer.log_dir) if tb_writer else Path( opt.logdir) / 'evolve' # logging directory wdir = str(log_dir / 'weights') + os.sep # weights directory os.makedirs(wdir, exist_ok=True) last = wdir + 'last.pt' best = wdir + 'best.pt' results_file = str(log_dir / 'results.txt') epochs, batch_size, total_batch_size, weights, rank, loss_name = \ opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank, opt.loss if loss_name == 'ciou': loss_fn = compute_loss_ciou elif loss_name == 'giou': loss_fn = compute_loss_giou elif loss_name == 'gioupp': loss_fn = compute_loss_gioupp # TODO: Use DDP logging. Only the first process is allowed to log. # Save run settings with open(log_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(log_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure cuda = device.type != 'cpu' init_seeds(2 + rank) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict train_path = data_dict['train'] test_path = data_dict['val'] nc, names = (1, ['item']) if opt.single_cls else (int( data_dict['nc']), data_dict['names']) # number classes, names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model pretrained = weights.endswith('.pt') if pretrained: with torch_distributed_zero_first(rank): attempt_download(weights) # download if not found locally ckpt = torch.load(weights, map_location=device) # load checkpoint model = Darknet(opt.cfg).to(device) # create state_dict = { k: v for k, v in ckpt['model'].items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(state_dict, strict=False) print('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report else: model = Darknet(opt.cfg).to(device) # create # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in dict(model.named_parameters()).items(): if '.bias' in k: pg2.append(v) # biases elif 'Conv2d.weight' in k: pg1.append(v) # apply weight_decay else: pg0.append(v) # all else if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) print('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = lambda x: (( (1 + math.cos(x * math.pi / epochs)) / 2)**1.0) * 0.8 + 0.2 # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) # Resume start_epoch, best_fitness = 0, 0.0 if pretrained: # Optimizer if ckpt['optimizer'] is not None: optimizer.load_state_dict(ckpt['optimizer']) best_fitness = ckpt['best_fitness'] # Results if ckpt.get('training_results') is not None: with open(results_file, 'w') as file: file.write(ckpt['training_results']) # write results.txt # Epochs start_epoch = ckpt['epoch'] + 1 if epochs < start_epoch: print( '%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (weights, ckpt['epoch'], epochs)) epochs += ckpt['epoch'] # finetune additional epochs del ckpt, state_dict # Image sizes gs = 32 # grid size (max stride) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # DP mode if cuda and rank == -1 and torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model) # SyncBatchNorm if opt.sync_bn and cuda and rank != -1: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device) print('Using SyncBatchNorm()') # Exponential moving average ema = ModelEMA(model) if rank in [-1, 0] else None # DDP mode if cuda and rank != -1: model = DDP(model, device_ids=[opt.local_rank], output_device=(opt.local_rank)) # Trainloader dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, local_rank=rank, world_size=opt.world_size) mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) # Testloader if rank in [-1, 0]: ema.updates = start_epoch * nb // accumulate # set EMA updates *** # local_rank is set to -1. Because only the first process is expected to do evaluation. testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt, hyp=hyp, augment=False, cache=opt.cache_images, rect=True, local_rank=-1, world_size=opt.world_size)[0] # Model parameters hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # giou loss ratio (obj_loss = 1.0 or giou) model.class_weights = labels_to_class_weights(dataset.labels, nc).to( device) # attach class weights model.names = names # Class frequency if rank in [-1, 0]: labels = np.concatenate(dataset.labels, 0) c = torch.tensor(labels[:, 0]) # classes # cf = torch.bincount(c.long(), minlength=nc) + 1. # model._initialize_biases(cf.to(device)) plot_labels(labels, save_dir=log_dir) if tb_writer: tb_writer.add_histogram('classes', c, 0) # Check anchors #if not opt.noautoanchor: # check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Start training t0 = time.time() nw = max(3 * nb, 1e3) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = ( 0, 0, 0, 0, 0, 0, 0 ) # 'P', 'R', 'mAP', 'F1', 'val GIoU', 'val Objectness', 'val Classification' scheduler.last_epoch = start_epoch - 1 # do not move scaler = amp.GradScaler(enabled=cuda) if rank in [0, -1]: print('Image sizes %g train, %g test' % (imgsz, imgsz_test)) print('Using %g dataloader workers' % dataloader.num_workers) print('Starting training for %g epochs...' % epochs) # torch.autograd.set_detect_anomaly(True) for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if dataset.image_weights: # Generate indices if rank in [-1, 0]: w = model.class_weights.cpu().numpy() * ( 1 - maps)**2 # class weights image_weights = labels_to_image_weights(dataset.labels, nc=nc, class_weights=w) dataset.indices = random.choices( range(dataset.n), weights=image_weights, k=dataset.n) # rand weighted idx # Broadcast if DDP if rank != -1: indices = torch.zeros([dataset.n], dtype=torch.int) if rank == 0: indices[:] = torch.from_tensor(dataset.indices, dtype=torch.int) dist.broadcast(indices, 0) if rank != 0: dataset.indices = indices.cpu().numpy() # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = torch.zeros(4, device=device) # mean losses if rank != -1: dataloader.sampler.set_epoch(epoch) pbar = enumerate(dataloader) if rank in [-1, 0]: print( ('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'GIoU', 'obj', 'cls', 'total', 'targets', 'img_size')) pbar = tqdm(pbar, total=nb) # progress bar optimizer.zero_grad() for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.to(device, non_blocking=True).float( ) / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # giou loss ratio (obj_loss = 1.0 or giou) accumulate = max( 1, np.interp(ni, xi, [1, nbs / total_batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp( ni, xi, [0.1 if j == 2 else 0.0, x['initial_lr'] * lf(epoch)]) if 'momentum' in x: x['momentum'] = np.interp(ni, xi, [0.9, hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Autocast with amp.autocast(enabled=cuda): # Forward pred = model(imgs) # Loss loss, loss_items = compute_loss(pred, targets.to(device), model) # scaled by batch_size if rank != -1: loss *= opt.world_size # gradient averaged between devices in DDP mode # if not torch.isfinite(loss): # print('WARNING: non-finite loss, ending training ', loss_items) # return results # Backward scaler.scale(loss).backward() # Optimize if ni % accumulate == 0: scaler.step(optimizer) # optimizer.step scaler.update() optimizer.zero_grad() if ema is not None: ema.update(model) # Print if rank in [-1, 0]: if loss_items: mloss = (mloss * i + loss_items) / (i + 1 ) # update mean losses mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB) s = ('%10s' * 2 + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if ni < 3: f = str(log_dir / ('train_batch%g.jpg' % ni)) # filename result = plot_images(images=imgs, targets=targets, paths=paths, fname=f) if tb_writer and result is not None: tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # Scheduler scheduler.step() # DDP process 0 or single-GPU if rank in [-1, 0]: # mAP if ema is not None: ema.update_attr(model) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test( opt.data, batch_size=batch_size, imgsz=imgsz_test, save_json=final_epoch and opt.data.endswith(os.sep + 'coco.yaml'), model=ema.ema.module if hasattr(ema.ema, 'module') else ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=log_dir, compute_loss=loss_fn) # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP, F1, test_losses=(GIoU, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Tensorboard if tb_writer: if loss_items: tags = [ 'train/giou_loss', 'train/obj_loss', 'train/cls_loss', 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', 'val/giou_loss', 'val/obj_loss', 'val/cls_loss' ] for x, tag in zip(list(mloss[:-1]) + list(results), tags): tb_writer.add_scalar(tag, x, epoch) else: tags = [ "train/" + loss_name + "_loss", 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95', "val/" + loss_name + "_loss" ] for x, tag in zip(list(mloss) + list(results), tags): tb_writer.add_scalar(tag, x, epoch) # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # fitness_i = weighted combination of [P, R, mAP, F1] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: with open(results_file, 'r') as f: # create checkpoint ckpt = { 'epoch': epoch, 'best_fitness': best_fitness, 'training_results': f.read(), 'model': ema.ema.module.state_dict() if hasattr(ema, 'module') else ema.ema.state_dict(), 'optimizer': None if final_epoch else optimizer.state_dict() } # Save last, best and delete torch.save(ckpt, last) if epoch >= (epochs - 5): torch.save(ckpt, last.replace('.pt', '_{:03d}.pt'.format(epoch))) if (best_fitness == fi) and not final_epoch: torch.save(ckpt, best) del ckpt # end epoch ---------------------------------------------------------------------------------------------------- # end training if rank in [-1, 0]: # Strip optimizers n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]): if os.path.exists(f1): os.rename(f1, f2) # rename ispt = f2.endswith('.pt') # is *.pt strip_optimizer(f2) if ispt else None # strip optimizer os.system('gsutil cp %s gs://%s/weights' % ( f2, opt.bucket)) if opt.bucket and ispt else None # upload # Finish if not opt.evolve: plot_results(save_dir=log_dir) # save as results.png print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) dist.destroy_process_group() if rank not in [-1, 0] else None torch.cuda.empty_cache() return results
def test( data, weights=None, batch_size=16, imgsz=640, conf_thres=0.001, iou_thres=0.6, # for NMS single_cls=False, augment=False, verbose=False, model=None, dataloader=None, save_dir=Path(''), # for saving images save_txt=False, # for auto-labelling plots=True): # Initialize/load model and set device training = model is not None if training: # called by train.py device = next(model.parameters()).device # get model device else: # called directly set_logging() device = select_device(opt.device, batch_size=batch_size) save_txt = opt.save_txt # save *.txt labels if save_txt: out = Path('inference/output') if os.path.exists(out): shutil.rmtree(out) # delete output folder os.makedirs(out) # make new output folder # Remove previous for f in glob.glob(str(save_dir / 'test_batch*.jpg')): os.remove(f) # Load model model = attempt_load(weights, map_location=device) # load FP32 model imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: # model = nn.DataParallel(model) # Half half = device.type != 'cpu' # half precision only supported on CUDA if half: model.half() # Configure model.eval() with open(data) as f: data = yaml.load(f, Loader=yaml.FullLoader) # model dict check_dataset(data) # check nc = 1 if single_cls else int(data['nc']) # number of classes iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for [email protected]:0.95 niou = iouv.numel() # Dataloader if not training: img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img _ = model(img.half() if half else img ) if device.type != 'cpu' else None # run once path = data['test'] if opt.task == 'test' else data[ 'val'] # path to val/test images dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, hyp=None, augment=False, cache=False, pad=0.5, rect=True)[0] seen = 0 names = model.names if hasattr(model, 'names') else model.module.names coco91class = coco80_to_coco91_class() s = ('%20s' + '%12s' * 6) % ('Class', 'Images', 'Targets', 'P', 'R', '[email protected]', '[email protected]:.95') p, r, f1, mp, mr, map50, map, t0, t1 = 0., 0., 0., 0., 0., 0., 0., 0., 0. loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): img = img.to(device, non_blocking=True) img = img.half() if half else img.float() # uint8 to fp16/32 img /= 255.0 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) nb, _, height, width = img.shape # batch size, channels, height, width whwh = torch.Tensor([width, height, width, height]).to(device) # Disable gradients with torch.no_grad(): # Run model t = time_synchronized() inf_out, train_out = model( img, augment=augment) # inference and training outputs t0 += time_synchronized() - t # Compute loss if training: # if model has loss hyperparameters loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls # Run NMS t = time_synchronized() output = non_max_suppression(inf_out, conf_thres=conf_thres, iou_thres=iou_thres) t1 += time_synchronized() - t # Statistics per image for si, pred in enumerate(output): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) tcls = labels[:, 0].tolist() if nl else [] # target class seen += 1 if pred is None: if nl: stats.append((torch.zeros(0, niou, dtype=torch.bool), torch.Tensor(), torch.Tensor(), tcls)) continue # Append to text file if save_txt: gn = torch.tensor(shapes[si][0])[[1, 0, 1, 0 ]] # normalization gain whwh x = pred.clone() x[:, :4] = scale_coords(img[si].shape[1:], x[:, :4], shapes[si][0], shapes[si][1]) # to original for *xyxy, conf, cls in x: xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist() # normalized xywh with open(str(out / Path(paths[si]).stem) + '.txt', 'a') as f: f.write( ('%g ' * 5 + '\n') % (cls, *xywh)) # label format # Clip boxes to image bounds clip_coords(pred, (height, width)) # Assign all predictions as incorrect correct = torch.zeros(pred.shape[0], niou, dtype=torch.bool, device=device) if nl: detected = [] # target indices tcls_tensor = labels[:, 0] # target boxes tbox = xywh2xyxy(labels[:, 1:5]) * whwh # Per target class for cls in torch.unique(tcls_tensor): ti = (cls == tcls_tensor).nonzero(as_tuple=False).view( -1) # prediction indices pi = (cls == pred[:, 5]).nonzero(as_tuple=False).view( -1) # target indices # Search for detections if pi.shape[0]: # Prediction to target ious ious, i = box_iou(pred[pi, :4], tbox[ti]).max( 1) # best ious, indices # Append detections detected_set = set() for j in (ious > iouv[0]).nonzero(as_tuple=False): d = ti[i[j]] # detected target if d.item() not in detected_set: detected_set.add(d.item()) detected.append(d) correct[ pi[j]] = ious[j] > iouv # iou_thres is 1xn if len( detected ) == nl: # all targets already located in image break # Append statistics (correct, conf, pcls, tcls) stats.append( (correct.cpu(), pred[:, 4].cpu(), pred[:, 5].cpu(), tcls)) # Plot images if plots and batch_i < 1: f = save_dir / ('test_batch%g_gt.jpg' % batch_i) # filename plot_images(img, targets, paths, str(f), names) # ground truth f = save_dir / ('test_batch%g_pred.jpg' % batch_i) plot_images(img, output_to_target(output, width, height), paths, str(f), names) # predictions # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, fname=save_dir / 'precision-recall_curve.png') p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean( 1) # [P, R, [email protected], [email protected]:0.95] mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean() nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class else: nt = torch.zeros(1) # Print results pf = '%20s' + '%12.3g' * 6 # print format print(pf % ('all', seen, nt.sum(), mp, mr, map50, map)) # Print results per class if verbose and nc > 1 and len(stats): for i, c in enumerate(ap_class): print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i])) # Print speeds t = tuple(x / seen * 1E3 for x in (t0, t1, t0 + t1)) + (imgsz, imgsz, batch_size) # tuple if not training: print( 'Speed: %.1f/%.1f/%.1f ms inference/NMS/total per %gx%g image at batch-size %g' % t) # Return results model.float() # for training maps = np.zeros(nc) + map for i, c in enumerate(ap_class): maps[c] = ap[i] return (mp, mr, map50, map, *(loss.cpu() / len(dataloader)).tolist()), maps, t