def train(hyp, opt, tb_writer=None): logger.info( colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items())) save_dir, epochs, batch_size, weights = Path( opt.save_dir), opt.epochs, opt.batch_size, opt.weights # Directories wdir = save_dir / 'weights' wdir.mkdir(parents=True, exist_ok=True) # make dir last = wdir / 'last.pkl' best = wdir / 'best.pkl' results_file = save_dir / 'results.txt' # Save run settings with open(save_dir / 'hyp.yaml', 'w') as f: yaml.dump(hyp, f, sort_keys=False) with open(save_dir / 'opt.yaml', 'w') as f: yaml.dump(vars(opt), f, sort_keys=False) # Configure plots = not opt.evolve # create plots cuda = not opt.no_cuda if cuda: jt.flags.use_cuda = 1 init_seeds(1) with open(opt.data) as f: data_dict = yaml.load(f, Loader=yaml.SafeLoader) # data dict check_dataset(data_dict) # check train_path = data_dict['train'] test_path = data_dict['val'] nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes names = ['item'] if opt.single_cls and len( data_dict['names']) != 1 else data_dict['names'] # class names assert len(names) == nc, '%g names found for nc=%g dataset in %s' % ( len(names), nc, opt.data) # check # Model model = Model(opt.cfg, ch=3, nc=nc) # create pretrained = weights.endswith('.pkl') if pretrained: model.load(weights) # load # Optimizer nbs = 64 # nominal batch size accumulate = max(round(nbs / batch_size), 1) # accumulate loss before optimizing hyp['weight_decay'] *= batch_size * accumulate / nbs # scale weight_decay logger.info(f"Scaled weight_decay = {hyp['weight_decay']}") pg0, pg1, pg2 = [], [], [] # optimizer parameter groups for k, v in model.named_modules(): if hasattr(v, 'bias') and isinstance(v.bias, jt.Var): pg2.append(v.bias) # biases if isinstance(v, nn.BatchNorm): pg0.append(v.weight) # no decay elif hasattr(v, 'weight') and isinstance(v.weight, jt.Var): pg1.append(v.weight) # apply decay if opt.adam: optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum else: optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True) optimizer.add_param_group({ 'params': pg1, 'weight_decay': hyp['weight_decay'] }) # add pg1 with weight_decay optimizer.add_param_group({'params': pg2}) # add pg2 (biases) logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0))) del pg0, pg1, pg2 # Scheduler https://arxiv.org/pdf/1812.01187.pdf # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf'] scheduler = optim.LambdaLR(optimizer, lr_lambda=lf) # plot_lr_scheduler(optimizer, scheduler, epochs) loggers = {} # loggers dict start_epoch, best_fitness = 0, 0.0 # Image sizes gs = int(model.stride.max()) # grid size (max stride) nl = model.model[ -1].nl # number of detection layers (used for scaling hyp['obj']) imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size ] # verify imgsz are gs-multiples # EMA ema = ModelEMA(model) # Trainloader dataloader = create_dataloader(train_path, imgsz, batch_size, gs, opt, hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, workers=opt.workers, image_weights=opt.image_weights, quad=opt.quad, prefix=colorstr('train: ')) mlc = np.concatenate(dataloader.labels, 0)[:, 0].max() # max label class nb = len(dataloader) # number of batches assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % ( mlc, nc, opt.data, nc - 1) ema.updates = start_epoch * nb // accumulate # set EMA updates testloader = create_dataloader( test_path, imgsz_test, batch_size, gs, opt, # testloader hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, workers=opt.workers, pad=0.5, prefix=colorstr('val: ')) labels = np.concatenate(dataloader.labels, 0) c = jt.array(labels[:, 0]) # classes # cf = torch.bincount(c.int(), minlength=nc) + 1. # frequency # model._initialize_biases(cf) if plots: plot_labels(labels, save_dir, loggers) if tb_writer: tb_writer.add_histogram('classes', c.numpy(), 0) # Anchors if not opt.noautoanchor: check_anchors(dataloader, model=model, thr=hyp['anchor_t'], imgsz=imgsz) # Model parameters hyp['box'] *= 3. / nl # scale to layers hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers hyp['obj'] *= (imgsz / 640)**2 * 3. / nl # scale to image size and layers model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) model.class_weights = labels_to_class_weights( dataloader.labels, nc) * nc # attach class weights model.names = names # Start training t0 = time.time() nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training maps = np.zeros(nc) # mAP per class results = (0, 0, 0, 0, 0, 0, 0 ) # P, R, [email protected], [email protected], val_loss(box, obj, cls) scheduler.last_epoch = start_epoch - 1 # do not move logger.info(f'Image sizes {imgsz} train, {imgsz_test} test\n' f'Using {dataloader.num_workers} dataloader workers\n' f'Logging results to {save_dir}\n' f'Starting training for {epochs} epochs...') for epoch in range( start_epoch, epochs ): # epoch ------------------------------------------------------------------ model.train() # Update image weights (optional) if opt.image_weights: # Generate indices cw = model.class_weights.numpy() * (1 - maps)**2 / nc # class weights iw = labels_to_image_weights(dataloader.labels, nc=nc, class_weights=cw) # image weights dataloader.indices = random.choices( range(dataloader.n), weights=iw, k=dataloader.n) # rand weighted idx # Update mosaic border # b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs) # dataset.mosaic_border = [b - imgsz, -b] # height, width borders mloss = jt.zeros((4, )) # mean losses pbar = enumerate(dataloader) logger.info( ('\n' + '%10s' * 7) % ('Epoch', 'box', 'obj', 'cls', 'total', 'targets', 'img_size')) pbar = tqdm(pbar, total=nb) # progress bar for i, ( imgs, targets, paths, _ ) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) imgs = imgs.float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: xi = [0, nw] # x interp # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou) # accumulate = max(1, np.interp(ni, xi, [1, nbs / batch_size]).round()) for j, x in enumerate(optimizer.param_groups): # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0 x['lr'] = np.interp(ni, xi, [ hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch) ]) if 'momentum' in x: x['momentum'] = np.interp( ni, xi, [hyp['warmup_momentum'], hyp['momentum']]) # Multi-scale if opt.multi_scale: sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size sf = sz / max(imgs.shape[2:]) # scale factor if sf != 1: ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:] ] # new shape (stretched to gs-multiple) imgs = nn.interpolate(imgs, size=ns, mode='bilinear', align_corners=False) # Forward pred = model(imgs) # forward loss, loss_items = compute_loss(pred, targets, model) # loss scaled by batch_size if opt.quad: loss *= 4. # Optimize optimizer.step(loss) if ema: ema.update(model) # Print mloss = (mloss * i + loss_items) / (i + 1) # update mean losses s = ('%10s' + '%10.4g' * 6) % ('%g/%g' % (epoch, epochs - 1), *mloss, targets.shape[0], imgs.shape[-1]) pbar.set_description(s) # Plot if plots and ni < 3: f = save_dir / f'train_batch{ni}.jpg' # filename Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start() # if tb_writer: # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch) # tb_writer.add_graph(model, imgs) # add model to tensorboard # end batch ------------------------------------------------------------------------------------------------ # end epoch ---------------------------------------------------------------------------------------------------- # Scheduler lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard scheduler.step() # mAP if ema: ema.update_attr(model, include=[ 'yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights' ]) final_epoch = epoch + 1 == epochs if not opt.notest or final_epoch: # Calculate mAP results, maps, times = test.test(data=opt.data, batch_size=batch_size, imgsz=imgsz_test, model=ema.ema, single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, plots=plots and final_epoch) # Write with open(results_file, 'a') as f: f.write(s + '%10.4g' * 7 % results + '\n') # P, R, [email protected], [email protected], val_loss(box, obj, cls) if len(opt.name) and opt.bucket: os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name)) # Log tags = [ 'train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5-0.95', 'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags): if tb_writer: if hasattr(x, "numpy"): x = x.numpy() tb_writer.add_scalar(tag, x, epoch) # tensorboard # Update best mAP fi = fitness(np.array(results).reshape( 1, -1)) # weighted combination of [P, R, [email protected], [email protected]] if fi > best_fitness: best_fitness = fi # Save model save = (not opt.nosave) or (final_epoch and not opt.evolve) if save: # Save last, best and delete jt.save(ema.ema.state_dict(), last) if best_fitness == fi: jt.save(ema.ema.state_dict(), best) # end epoch ---------------------------------------------------------------------------------------------------- # end training # Strip optimizers final = best if best.exists() else last # final model if opt.bucket: os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload # Plots if plots: plot_results(save_dir=save_dir) # save as results.png # Test best.pkl logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600)) best_model = Model(opt.cfg) best_model.load(str(final)) best_model = best_model.fuse() if opt.data.endswith('coco.yaml') and nc == 80: # if COCO for conf, iou, save_json in ([0.25, 0.45, False], [0.001, 0.65, True]): # speed, mAP tests results, _, _ = test.test(opt.data, batch_size=total_batch_size, imgsz=imgsz_test, conf_thres=conf, iou_thres=iou, model=best_model, single_cls=opt.single_cls, dataloader=testloader, save_dir=save_dir, save_json=save_json, plots=False) return results
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(str(self.cfg['gpus']).split(",")) dist.init_process_group(backend='nccl') self.tdata = CustomerDataSets(json_path=self.data_cfg['train_json_path'], debug=self.data_cfg['debug'], augment=True, ) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler(dataset=self.tdata, shuffle=True)) self.vdata = CustomerDataSets(json_path=self.data_cfg['val_json_path'], debug=self.data_cfg['debug'], augment=False, ) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler(dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata)) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) if self.cfg['model_name'] == "v4": net = YOLOv4 elif self.cfg['model_name'] == "v5": net = YOLOv5 else: raise NotImplementedError("{:s} not supported yet".format(self.cfg['model_name'])) model = net(num_cls=self.model_cfg['num_cls'], anchors=self.model_cfg['anchors'], strides=self.model_cfg['strides'], scale_name=self.model_cfg['scale_name'], ) self.best_map = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], gamma=self.optim_cfg['gamma'], bias_idx=2, milestones=self.optim_cfg['milestones'] ) self.obj_logger = AverageLogger() self.iou_logger = AverageLogger() self.loss_logger = AverageLogger() self.map_logger = AverageLogger()
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(self.cfg['gpus'].split(',')) dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collect_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collect_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = SparseRCNN(**self.model_cfg) self.best_map = 0. optimizer = split_optimizer_v2(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.scaler = amp.GradScaler( enabled=True) if self.optim_cfg['amp'] else None self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = IterWarmUpMultiStepDecay( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_iter=self.optim_cfg['warm_up_iter'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], warm_up_factor=self.optim_cfg['warm_up_factor']) self.cls_loss_logger = AverageLogger() self.l1_loss_logger = AverageLogger() self.iou_loss_logger = AverageLogger() self.match_num_logger = AverageLogger() self.loss_logger = AverageLogger()
class DDPMixSolver(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(str(self.cfg['gpus']).split(",")) dist.init_process_group(backend='nccl') self.tdata = CustomerDataSets(json_path=self.data_cfg['train_json_path'], debug=self.data_cfg['debug'], augment=True, ) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler(dataset=self.tdata, shuffle=True)) self.vdata = CustomerDataSets(json_path=self.data_cfg['val_json_path'], debug=self.data_cfg['debug'], augment=False, ) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler(dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata)) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) if self.cfg['model_name'] == "v4": net = YOLOv4 elif self.cfg['model_name'] == "v5": net = YOLOv5 else: raise NotImplementedError("{:s} not supported yet".format(self.cfg['model_name'])) model = net(num_cls=self.model_cfg['num_cls'], anchors=self.model_cfg['anchors'], strides=self.model_cfg['strides'], scale_name=self.model_cfg['scale_name'], ) self.best_map = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], gamma=self.optim_cfg['gamma'], bias_idx=2, milestones=self.optim_cfg['milestones'] ) self.obj_logger = AverageLogger() self.iou_logger = AverageLogger() self.loss_logger = AverageLogger() self.map_logger = AverageLogger() def train(self, epoch): self.obj_logger.reset() self.iou_logger.reset() self.loss_logger.reset() self.model.train() if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader for i, (img_tensor, targets_tensor) in enumerate(pbar): with torch.no_grad(): if len(self.data_cfg['multi_scale']) > 2: target_size = np.random.choice(self.data_cfg['multi_scale']) img_tensor = interpolate(img_tensor, mode='bilinear', size=target_size, align_corners=False) _, _, h, w = img_tensor.shape img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) self.optimizer.zero_grad() with amp.autocast(enabled=True): ret = self.model(img_tensor, targets_tensor) obj_loss = ret['obj_loss'] iou_loss = ret['iou_loss'] loss = obj_loss + iou_loss self.scaler.scale(loss).backward() self.lr_adjuster(self.optimizer, i, epoch) ulr = self.optimizer.param_groups[0]['lr'] dlr = self.optimizer.param_groups[2]['lr'] self.scaler.step(self.optimizer) self.scaler.update() self.ema.update(self.model) self.obj_logger.update(obj_loss.item()) self.iou_logger.update(iou_loss.item()) self.loss_logger.update(loss.item()) if self.local_rank == 0: pbar.set_description( "epoch:{:2d}|size:{:3d}|loss:{:6.4f}|obj_loss:{:6.4f}|iou_loss:{:6.4f}|ulr:{:8.6f},dlr:{:8.6f}".format( epoch + 1, h, self.loss_logger.avg(), obj_loss.item(), iou_loss.item(), ulr, dlr )) self.ema.update_attr(self.model) print( "epoch:{:3d}|local:{:3d}|loss:{:6.4f}||obj_loss:{:6.4f}|iou_loss:{:6.4f}".format(epoch + 1, self.local_rank, self.loss_logger.avg(), self.obj_logger.avg(), self.iou_logger.avg(), ) ) @torch.no_grad() def val(self, epoch): self.model.eval() self.ema.ema.eval() predict_list = list() target_list = list() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for img_tensor, targets_tensor in pbar: _, _, h, w = img_tensor.shape targets_tensor[:, 1:] = targets_tensor[:, 1:] * torch.tensor(data=[w, h, w, h]) targets_tensor[:, [1, 2]] = targets_tensor[:, [1, 2]] - targets_tensor[:, [3, 4]] * 0.5 targets_tensor[:, [3, 4]] = targets_tensor[:, [1, 2]] + targets_tensor[:, [3, 4]] img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts = self.ema.ema(img_tensor)['predicts'] for i, pred in enumerate(predicts): if pred is not None: pred = torch.cat([pred, torch.zeros_like(pred[..., [0]])], dim=-1) predict_list.append(pred) targets_sample = targets_tensor[targets_tensor[:, 0] == i][:, 1:] targets_sample = torch.cat([torch.zeros_like(targets_sample[..., [0]]), targets_sample], dim=-1) target_list.append(targets_sample) mp, mr, map50, map = coco_map(predict_list, target_list) mp = reduce_sum(torch.tensor(mp, device=self.device)).item() / self.gpu_num mr = reduce_sum(torch.tensor(mr, device=self.device)).item() / self.gpu_num map50 = reduce_sum(torch.tensor(map50, device=self.device)).item() / self.gpu_num map = reduce_sum(torch.tensor(map, device=self.device)).item() / self.gpu_num if self.local_rank == 0: print("epoch: {:2d}|gpu_num:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}" .format(epoch + 1, self.gpu_num, mp * 100, mr * 100, map50 * 100, map * 100)) last_weight_path = os.path.join(self.val_cfg['weight_path'], "{:s}_{:s}_last.pth" .format(self.cfg['model_name'], self.model_cfg['scale_name'])) best_map_weight_path = os.path.join(self.val_cfg['weight_path'], "{:s}_{:s}_best_map.pth" .format(self.cfg['model_name'], self.model_cfg['scale_name'])) ema_static = self.ema.ema.state_dict() cpkt = { "ema": ema_static, "map": map * 100, "epoch": epoch, } if self.local_rank != 0: return torch.save(cpkt, last_weight_path) if map > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = map def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch) dist.destroy_process_group() torch.cuda.empty_cache()
class DDPMixSolver(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(self.cfg['gpus'].split(',')) dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collect_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collect_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = SparseRCNN(**self.model_cfg) self.best_map = 0. optimizer = split_optimizer_v2(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.scaler = amp.GradScaler( enabled=True) if self.optim_cfg['amp'] else None self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = IterWarmUpMultiStepDecay( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_iter=self.optim_cfg['warm_up_iter'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], warm_up_factor=self.optim_cfg['warm_up_factor']) self.cls_loss_logger = AverageLogger() self.l1_loss_logger = AverageLogger() self.iou_loss_logger = AverageLogger() self.match_num_logger = AverageLogger() self.loss_logger = AverageLogger() # if self.local_rank == 0: # print(self.model) def train(self, epoch): self.loss_logger.reset() self.cls_loss_logger.reset() self.l1_loss_logger.reset() self.iou_loss_logger.reset() self.match_num_logger.reset() self.model.train() if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader for i, (img_tensor, targets_tensor, batch_len) in enumerate(pbar): _, _, h, w = img_tensor.shape with torch.no_grad(): img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) self.optimizer.zero_grad() if self.scaler is not None: with amp.autocast(enabled=True): out = self.model(img_tensor, targets={ "target": targets_tensor, "batch_len": batch_len }) cls_loss = out['cls_loss'] l1_loss = out['l1_loss'] iou_loss = out['iou_loss'] match_num = out['match_num'] loss = cls_loss + l1_loss + iou_loss self.scaler.scale(loss).backward() self.lr_adjuster(self.optimizer, i, epoch) self.scaler.step(self.optimizer) self.scaler.update() else: out = self.model(img_tensor, targets={ "target": targets_tensor, "batch_len": batch_len }) cls_loss = out['cls_loss'] l1_loss = out['l1_loss'] iou_loss = out['iou_loss'] match_num = out['match_num'] loss = cls_loss + l1_loss + iou_loss loss.backward() self.lr_adjuster(self.optimizer, i, epoch) self.optimizer.step() self.ema.update(self.model) lr = self.optimizer.param_groups[0]['lr'] self.loss_logger.update(loss.item()) self.iou_loss_logger.update(iou_loss.item()) self.l1_loss_logger.update(l1_loss.item()) self.cls_loss_logger.update(cls_loss.item()) self.match_num_logger.update(match_num) str_template = \ "epoch:{:2d}|match_num:{:0>4d}|size:{:3d}|loss:{:6.4f}|cls:{:6.4f}|l1:{:6.4f}|iou:{:6.4f}|lr:{:8.6f}" if self.local_rank == 0: pbar.set_description( str_template.format(epoch + 1, int(match_num), h, self.loss_logger.avg(), self.cls_loss_logger.avg(), self.l1_loss_logger.avg(), self.iou_loss_logger.avg(), lr)) self.ema.update_attr(self.model) loss_avg = reduce_sum( torch.tensor(self.loss_logger.avg(), device=self.device)) / self.gpu_num iou_loss_avg = reduce_sum( torch.tensor(self.iou_loss_logger.avg(), device=self.device)).item() / self.gpu_num l1_loss_avg = reduce_sum( torch.tensor(self.l1_loss_logger.avg(), device=self.device)).item() / self.gpu_num cls_loss_avg = reduce_sum( torch.tensor(self.cls_loss_logger.avg(), device=self.device)).item() / self.gpu_num match_num_sum = reduce_sum( torch.tensor(self.match_num_logger.sum(), device=self.device)).item() / self.gpu_num if self.local_rank == 0: final_template = "epoch:{:2d}|match_num:{:d}|loss:{:6.4f}|cls:{:6.4f}|l1:{:6.4f}|iou:{:6.4f}" print( final_template.format(epoch + 1, int(match_num_sum), loss_avg, cls_loss_avg, l1_loss_avg, iou_loss_avg)) @torch.no_grad() def val(self, epoch): predict_list = list() target_list = list() self.model.eval() self.ema.ema.eval() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for img_tensor, targets_tensor, batch_len in pbar: img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts = self.ema.ema(img_tensor)['predicts'] for pred, target in zip(predicts, targets_tensor.split(batch_len)): predict_list.append(pred) target_list.append(target) mp, mr, map50, mean_ap = coco_map(predict_list, target_list) mp = reduce_sum(torch.tensor(mp, device=self.device)) / self.gpu_num mr = reduce_sum(torch.tensor(mr, device=self.device)) / self.gpu_num map50 = reduce_sum(torch.tensor(map50, device=self.device)) / self.gpu_num mean_ap = reduce_sum(torch.tensor(mean_ap, device=self.device)) / self.gpu_num if self.local_rank == 0: print("*" * 20, "eval start", "*" * 20) print( "epoch: {:2d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}". format(epoch + 1, mp * 100, mr * 100, map50 * 100, mean_ap * 100)) print("*" * 20, "eval end", "*" * 20) last_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_{:s}_last.pth".format(self.cfg['model_name'], self.model_cfg['backbone'])) best_map_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_{:s}_best_map.pth".format(self.cfg['model_name'], self.model_cfg['backbone'])) model_static = self.model.module.state_dict() cpkt = { "model": model_static, "map": mean_ap * 100, "epoch": epoch, "ema": self.ema.ema.state_dict() } if self.local_rank != 0: return torch.save(cpkt, last_weight_path) if mean_ap > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = mean_ap def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch)