class DDPApexProcessor(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = RetinaNet( num_cls=self.model_cfg['num_cls'], anchor_sizes=self.model_cfg['anchor_sizes'], strides=self.model_cfg['strides'], backbone=self.model_cfg['backbone'], ) if self.model_cfg.get("backbone_weight", None): weights = torch.load(self.model_cfg['backbone_weight']) model.load_backbone_weighs(weights) self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) beta = eval(self.hyper_params['beta']) if isinstance(self.hyper_params['beta'], str) \ else self.hyper_params['beta'] self.creterion = RetinaAnchorFreeLoss( alpha=self.hyper_params['alpha'], gamma=self.hyper_params['gamma'], beta=beta, top_k=self.hyper_params['top_k'], box_iou_thresh=self.hyper_params['box_iou_thresh'], box_reg_weight=self.hyper_params['box_reg_weight']) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights']) def train(self, epoch): self.model.train() if self.model_cfg['freeze_bn']: self.model.apply(freeze_bn) if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader loss_list = [list(), list(), list()] lr = 0 match_num = 0 for i, (img_tensor, targets_tensor, _) in enumerate(pbar): if len(self.hyper_params['multi_scale']) > 2: target_size = np.random.choice( self.hyper_params['multi_scale']) img_tensor = interpolate(img_tensor, mode='bilinear', size=target_size, align_corners=False) _, _, h, w = img_tensor.shape with torch.no_grad(): targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) self.optimizer.zero_grad() cls_predicts, reg_predicts, anchors = self.model(img_tensor) total_loss, detail_loss, total_num = self.creterion( cls_predicts, reg_predicts, anchors, targets_tensor) match_num += total_num with amp.scale_loss(total_loss, self.optimizer) as scaled_loss: scaled_loss.backward() nn.utils.clip_grad_norm_(amp.master_params(self.optimizer), max_norm=self.optim_cfg['max_norm'], norm_type=2) self.lr_adjuster(self.optimizer, i, epoch) lr = self.optimizer.param_groups[0]['lr'] self.optimizer.step() self.ema.update(self.model) loss_cls, loss_reg = detail_loss loss_list[0].append(total_loss.item()) loss_list[1].append(loss_cls.item()) loss_list[2].append(loss_reg.item()) if self.local_rank == 0: pbar.set_description( "epoch:{:2d}|match_num:{:4d}|size:{:3d}|total_loss:{:6.4f}|neg_cls:{:6.4f}|pos_reg:{:6.4f}|lr:{:8.6f}" .format(epoch + 1, total_num, h, total_loss.item(), loss_cls.item(), loss_reg.item(), lr)) self.ema.update_attr(self.model) mean_loss_list = [np.array(item).mean() for item in loss_list] print( "epoch:{:3d}|match_num:{:4d}|local:{:3d}|total_loss:{:6.4f}|neg_cls:{:6.4f}|pos_reg:{:6.4f}|lr:{:8.6f}" .format(epoch + 1, match_num, self.local_rank, mean_loss_list[0], mean_loss_list[1], mean_loss_list[2], lr)) @torch.no_grad() def val(self, epoch): predict_list = list() target_list = list() self.model.eval() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for img_tensor, targets_tensor, _ in pbar: _, _, h, w = img_tensor.shape targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts = self.model(img_tensor) for i in range(len(predicts)): predicts[i][:, [0, 2]] = predicts[i][:, [0, 2]].clamp(min=0, max=w) predicts[i][:, [1, 3]] = predicts[i][:, [1, 3]].clamp(min=0, max=h) predicts = non_max_suppression( predicts, conf_thresh=self.val_cfg['conf_thresh'], iou_thresh=self.val_cfg['iou_thresh'], max_det=self.val_cfg['max_det'], ) for i, predict in enumerate(predicts): predict_list.append(predict) targets_sample = targets_tensor[targets_tensor[:, 0] == i][:, 2:] target_list.append(targets_sample) mp, mr, map50, map = coco_map(predict_list, target_list) print( "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}" .format(epoch + 1, self.local_rank, mp * 100, mr * 100, map50 * 100, map * 100)) last_weight_path = os.path.join( self.val_cfg['weight_path'], "{:d}_{:s}_last.pth".format(self.local_rank, self.cfg['model_name'])) best_map_weight_path = os.path.join( self.val_cfg['weight_path'], "{:d}_{:s}_best_map.pth".format(self.local_rank, self.cfg['model_name'])) best_map50_weight_path = os.path.join( self.val_cfg['weight_path'], "{:d}_{:s}_best_map50.pth".format(self.local_rank, self.cfg['model_name'])) model_static = self.model.module.state_dict() if is_parallel( self.model) else self.model.state_dict() ema_static = self.ema.ema.state_dict() cpkt = { "ori": model_static, "ema": ema_static, "map": map * 100, "epoch": epoch, "map50": map50 * 100 } if self.local_rank != 0: return torch.save(cpkt, last_weight_path) if map > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = map if map50 > self.best_map50: torch.save(cpkt, best_map50_weight_path) self.best_map50 = map50 def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch)
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = RetinaNet( num_cls=self.model_cfg['num_cls'], anchor_sizes=self.model_cfg['anchor_sizes'], strides=self.model_cfg['strides'], backbone=self.model_cfg['backbone'], ) if self.model_cfg.get("backbone_weight", None): weights = torch.load(self.model_cfg['backbone_weight']) model.load_backbone_weighs(weights) self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) model, optimizer = amp.initialize(model, optimizer, opt_level='O1', verbosity=0) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) beta = eval(self.hyper_params['beta']) if isinstance(self.hyper_params['beta'], str) \ else self.hyper_params['beta'] self.creterion = RetinaAnchorFreeLoss( alpha=self.hyper_params['alpha'], gamma=self.hyper_params['gamma'], beta=beta, top_k=self.hyper_params['top_k'], box_iou_thresh=self.hyper_params['box_iou_thresh'], box_reg_weight=self.hyper_params['box_reg_weight']) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights'])
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], augments=True, use_crowd=self.data_cfg['use_crowd'], remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], augments=False, use_crowd=self.data_cfg['use_crowd'], remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = YOLOv5( num_cls=self.model_cfg['num_cls'], anchors=self.model_cfg['anchors'], strides=self.model_cfg['strides'], scale_name=self.model_cfg['scale_name'], ) self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) pretrain = self.model_cfg.get("pretrain", None) if pretrain: pretrain_weights = torch.load(pretrain, map_location=self.device) load_info = model.load_state_dict(pretrain_weights, strict=False) print("load_info ", load_info) self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.creterion = YOLOv5LossOriginal( iou_type=self.hyper_params['iou_type'], ) self.lr_adjuster = EpochWarmUpCosineDecayLRAdjust( init_lr=self.optim_cfg['lr'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], gamma=self.optim_cfg['gamma'], bias_idx=2)
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] dist.init_process_group(backend='nccl') img_size = int(self.model_cfg['compound_coef']) * 128 + 512 self.tdata = COCODataSets(img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=img_size, debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank'] ) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler(dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets(img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=img_size, debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False ) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler(dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = EfficientDet(num_cls=self.model_cfg['num_cls'], compound_coef=self.model_cfg['compound_coef'] ) self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.creterion = RetinaLoss(iou_thresh=self.hyper_params['iou_thresh'], ignore_thresh=self.hyper_params['ignore_thresh'], alpha=self.hyper_params['alpha'], gamma=self.hyper_params['gamma'], iou_type=self.hyper_params['iou_type'], coord_type=self.hyper_params['coord_type'] ) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights'] )
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] dist.init_process_group(backend='nccl', init_method='env://') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model = FCOS( num_cls=self.model_cfg['num_cls'], strides=self.model_cfg['strides'], backbone=self.model_cfg['backbone'], ) optimizer = split_optimizer(model, self.optim_cfg) model.to(self.device) pretrain = self.model_cfg.get('pretrain', None) if pretrain is not None: pretrained_weights = torch.load(pretrain, map_location=self.device) load_info = model.load_state_dict(pretrained_weights['ema'], strict=False) print('load info ', load_info) self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.creterion = FCOSLoss( alpha=self.hyper_params['alpha'], gamma=self.hyper_params['gamma'], radius=self.hyper_params['radius'], layer_limits=self.hyper_params['layer_limits'], strides=self.model_cfg['strides'], iou_type=self.hyper_params['iou_type']) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights']) self.tb_writer = None if self.local_rank == 0: log_dir = 'runs/' print( 'Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/' % log_dir) self.tb_writer = SummaryWriter(log_dir=log_dir) self.best_map = 0. self.best_map50 = 0.
class COCODDPApexProcessor(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], augments=True, use_crowd=self.data_cfg['use_crowd'], remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], augments=False, use_crowd=self.data_cfg['use_crowd'], remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = YOLOv5( num_cls=self.model_cfg['num_cls'], anchors=self.model_cfg['anchors'], strides=self.model_cfg['strides'], scale_name=self.model_cfg['scale_name'], ) self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) pretrain = self.model_cfg.get("pretrain", None) if pretrain: pretrain_weights = torch.load(pretrain, map_location=self.device) load_info = model.load_state_dict(pretrain_weights, strict=False) print("load_info ", load_info) self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.creterion = YOLOv5LossOriginal( iou_type=self.hyper_params['iou_type'], ) self.lr_adjuster = EpochWarmUpCosineDecayLRAdjust( init_lr=self.optim_cfg['lr'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], gamma=self.optim_cfg['gamma'], bias_idx=2) def train(self, epoch): self.model.train() if self.model_cfg['freeze_bn']: self.model.apply(freeze_bn) if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader loss_list = [list(), list(), list(), list()] ulr = 0 dlr = 0 match_num = 0 for i, (img_tensor, targets_tensor, _) in enumerate(pbar): if len(self.hyper_params['multi_scale']) > 2: target_size = np.random.choice( self.hyper_params['multi_scale']) img_tensor = interpolate(img_tensor, mode='bilinear', size=target_size, align_corners=False) _, _, h, w = img_tensor.shape with torch.no_grad(): img_tensor = img_tensor.to(self.device) # bs_idx,weights,label_idx,x1,y1,x2,y2 targets_tensor[:, [ 5, 6 ]] = targets_tensor[:, [5, 6]] - targets_tensor[:, [3, 4]] targets_tensor[:, [ 3, 4 ]] = targets_tensor[:, [3, 4]] + targets_tensor[:, [5, 6]] / 2. targets_tensor = targets_tensor.to(self.device) self.optimizer.zero_grad() with amp.autocast(enabled=True): predicts, anchors = self.model(img_tensor) total_loss, detail_loss, total_num = self.creterion( predicts, targets_tensor, anchors) self.scaler.scale(total_loss).backward() match_num += total_num self.lr_adjuster(self.optimizer, i, epoch) ulr = self.optimizer.param_groups[0]['lr'] dlr = self.optimizer.param_groups[2]['lr'] self.scaler.step(self.optimizer) self.scaler.update() self.ema.update(self.model) loss_box, loss_obj, loss_cls, loss = detail_loss loss_list[0].append(loss_box.item()) loss_list[1].append(loss_obj.item()) loss_list[2].append(loss_cls.item()) loss_list[3].append(loss.item()) if self.local_rank == 0: pbar.set_description( "epoch:{:2d}|match_num:{:4d}|size:{:3d}|loss:{:6.4f}|loss_box:{:6.4f}|loss_obj:{:6.4f}|loss_cls:{:6.4f}|ulr:{:8.6f},dlr:{:8.6f}" .format(epoch + 1, int(total_num), h, loss.item(), loss_box.item(), loss_obj.item(), loss_cls.item(), ulr, dlr)) self.ema.update_attr(self.model) mean_loss_list = [np.array(item).mean() for item in loss_list] print( "epoch:{:3d}|match_num:{:4d}|local:{:3d}|loss:{:6.4f}||loss_box:{:6.4f}|loss_obj:{:6.4f}|loss_cls:{:6.4f}|ulr:{:8.6f}|dlr:{:8.6f}" .format(epoch + 1, match_num, self.local_rank, mean_loss_list[3], mean_loss_list[0], mean_loss_list[1], mean_loss_list[2], ulr, dlr)) @torch.no_grad() def val(self, epoch): predict_list = list() target_list = list() # self.model.eval() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for img_tensor, targets_tensor, _ in pbar: _, _, h, w = img_tensor.shape targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts = self.ema.ema(img_tensor) predicts = non_max_suppression( predicts, conf_thresh=self.val_cfg['conf_thresh'], iou_thresh=self.val_cfg['iou_thresh'], max_det=self.val_cfg['max_det'], ) for i, predict in enumerate(predicts): if predict is not None: clip_coords(predict, (h, w)) predict_list.append(predict) targets_sample = targets_tensor[targets_tensor[:, 0] == i][:, 2:] target_list.append(targets_sample) mp, mr, map50, map = coco_map(predict_list, target_list) print( "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}" .format(epoch + 1, self.local_rank, mp * 100, mr * 100, map50 * 100, map * 100)) last_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_last.pth".format(self.cfg['model_name'])) best_map_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map.pth".format(self.cfg['model_name'])) best_map50_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map50.pth".format(self.cfg['model_name'])) # model_static = self.model.module.state_dict() if is_parallel(self.model) else self.model.state_dict() ema_static = self.ema.ema.state_dict() cpkt = { "ema": ema_static, "map": map * 100, "epoch": epoch, "map50": map50 * 100 } if self.local_rank != 0: return torch.save(cpkt, last_weight_path) if map > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = map if map50 > self.best_map50: torch.save(cpkt, best_map50_weight_path) self.best_map50 = map50 def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch) dist.destroy_process_group() torch.cuda.empty_cache()
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, shuffle=True) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, shuffle=False) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = RetinaNet( num_cls=self.model_cfg['num_cls'], anchor_sizes=self.model_cfg['anchor_sizes'], strides=self.model_cfg['strides'], backbone=self.model_cfg['backbone'], ) if self.model_cfg.get("backbone_weight", None): weights = torch.load(self.model_cfg['backbone_weight']) model.load_backbone_weighs(weights) self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) self.local_rank = 0 self.device = torch.device("cuda:0") model.to(self.device) self.model = model self.optimizer = optimizer self.ema = ModelEMA(self.model) beta = eval(self.hyper_params['beta']) if isinstance(self.hyper_params['beta'], str) \ else self.hyper_params['beta'] self.creterion = RetinaAnchorFreeLoss( alpha=self.hyper_params['alpha'], gamma=self.hyper_params['gamma'], beta=beta, top_k=self.hyper_params['top_k'], box_iou_thresh=self.hyper_params['box_iou_thresh'], box_reg_weight=self.hyper_params['box_reg_weight']) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights'])
class DDPMixProcessor(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] dist.init_process_group(backend='nccl', init_method='env://') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model = FCOS( num_cls=self.model_cfg['num_cls'], strides=self.model_cfg['strides'], backbone=self.model_cfg['backbone'], ) optimizer = split_optimizer(model, self.optim_cfg) model.to(self.device) pretrain = self.model_cfg.get('pretrain', None) if pretrain is not None: pretrained_weights = torch.load(pretrain, map_location=self.device) load_info = model.load_state_dict(pretrained_weights['ema'], strict=False) print('load info ', load_info) self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.creterion = FCOSLoss( alpha=self.hyper_params['alpha'], gamma=self.hyper_params['gamma'], radius=self.hyper_params['radius'], layer_limits=self.hyper_params['layer_limits'], strides=self.model_cfg['strides'], iou_type=self.hyper_params['iou_type']) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights']) self.tb_writer = None if self.local_rank == 0: log_dir = 'runs/' print( 'Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/' % log_dir) self.tb_writer = SummaryWriter(log_dir=log_dir) self.best_map = 0. self.best_map50 = 0. def train(self, epoch): self.model.train() if self.model_cfg['freeze_bn']: self.model.apply(freeze_bn) if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader loss_list = [list(), list(), list(), list()] lr = 0 match_num = 0 for i, (img_tensor, targets_tensor, _) in enumerate(pbar): if len(self.hyper_params['multi_scale']) > 2: target_size = np.random.choice( self.hyper_params['multi_scale']) img_tensor = interpolate(img_tensor, mode='bilinear', size=target_size, align_corners=False) _, _, h, w = img_tensor.shape with torch.no_grad(): targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) self.optimizer.zero_grad() # Forward # 混合精度 with amp.autocast(enabled=True): cls_outputs, reg_outputs, center_outputs, grids = self.model( img_tensor) total_loss, detail_loss, total_num = self.creterion( cls_outputs, reg_outputs, center_outputs, grids, targets_tensor) match_num += total_num self.scaler.scale(total_loss).backward() self.lr_adjuster(self.optimizer, i, epoch) lr0 = self.optimizer.param_groups[0]['lr'] lr1 = self.optimizer.param_groups[1]['lr'] lr2 = self.optimizer.param_groups[2]['lr'] self.scaler.step(self.optimizer) self.scaler.update() self.ema.update(self.model) loss_cls, loss_reg, loss_center = detail_loss loss_list[0].append(total_loss.item()) loss_list[1].append(loss_cls.item()) loss_list[2].append(loss_reg.item()) loss_list[3].append(loss_center.item()) if self.local_rank == 0: pbar.set_description( "epoch:{:3d}|match_num:{:4d}|size:{:3d}|target_loss:{:6.4f}|loss_cls:{:6.4f}|loss_reg:{:6.4f}|loss_cen:{:6.4f}|lr:{:8.6f}" .format(epoch + 1, int(total_num), h, total_loss.item(), loss_cls.item(), loss_reg.item(), loss_center.item(), lr0)) if self.tb_writer: tags = [ 'train/total_loss', 'train/cls_loss', 'train/reg_loss', 'train/center_loss', # train loss 'x/lr0', 'x/lr1', 'x/lr2' ] # params for x, tag in zip([ total_loss.item(), loss_cls.item(), loss_reg.item(), loss_center.item(), lr0, lr1, lr2 ], tags): self.tb_writer.add_scalar( tag, x, i + 1 + epoch * len(self.tloader)) self.ema.update_attr(self.model) mean_loss_list = [np.array(item).mean() for item in loss_list] print( "epoch:{:3d}|match_num:{:4d}|local:{:3d}|target_loss:{:6.4f}|loss_cls:{:6.4f}|loss_reg:{:6.4f}|loss_cen:{:6.4f}|lr:{:8.6f}" .format(epoch + 1, int(match_num), self.local_rank, mean_loss_list[0], mean_loss_list[1], mean_loss_list[2], mean_loss_list[3], lr)) @torch.no_grad() def val(self, epoch): predict_list = list() target_list = list() self.model.eval() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for img_tensor, targets_tensor, _ in pbar: _, _, h, w = img_tensor.shape targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts = self.ema.ema(img_tensor) for i in range(len(predicts)): predicts[i][:, [0, 2]] = predicts[i][:, [0, 2]].clamp(min=0, max=w) predicts[i][:, [1, 3]] = predicts[i][:, [1, 3]].clamp(min=0, max=h) predicts = non_max_suppression( predicts, conf_thresh=self.val_cfg['conf_thresh'], iou_thresh=self.val_cfg['iou_thresh'], max_det=self.val_cfg['max_det'], ) for i, predict in enumerate(predicts): predict_list.append(predict) targets_sample = targets_tensor[targets_tensor[:, 0] == i][:, 2:] target_list.append(targets_sample) mp, mr, map50, map = coco_map(predict_list, target_list) print( "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}" .format(epoch + 1, self.local_rank, mp * 100, mr * 100, map50 * 100, map * 100)) last_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_last.pth".format(self.cfg['model_name'])) best_map_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map.pth".format(self.cfg['model_name'])) best_map50_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map50.pth".format(self.cfg['model_name'])) ema_static = self.ema.ema.state_dict() cpkt = { "ema": ema_static, "map": map * 100, "epoch": epoch, "map50": map50 * 100 } if self.local_rank != 0: return if self.tb_writer: tags = [ 'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95' ] for x, tag in zip([mp, mr, map50, map], tags): self.tb_writer.add_scalar(tag, x, epoch + 1) torch.save(cpkt, last_weight_path) if map > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = map if map50 > self.best_map50: torch.save(cpkt, best_map50_weight_path) self.best_map50 = map50 def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch) dist.destroy_process_group() torch.cuda.empty_cache()
class DDPApexProcessor(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(str(self.cfg['gpus']).split(",")) dist.init_process_group(backend='nccl') ########################################################################################### self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=1, num_workers=1, collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) ############################################################################################ model = CenterNet(num_cls=self.model_cfg['num_cls'], PIXEL_MEAN=self.model_cfg['PIXEL_MEAN'], PIXEL_STD=self.model_cfg['PIXEL_STD'], backbone=self.model_cfg['backbone'], cfg=self.model_cfg) self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.gt_generator = CenterNetGT( alpha=self.model_cfg['alpha'], beta=self.model_cfg['beta'], num_cls=self.model_cfg['num_cls'], wh_planes=self.model_cfg['wh_planes'], down_ratio=self.model_cfg['down_ratio'], wh_area_process=self.model_cfg['wh_area_process']) self.creterion = CenterNetLoss( hm_weight=self.hyper_params['hm_weight'], wh_weight=self.hyper_params['wh_weight'], down_ratio=self.model_cfg['down_ratio']) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights']) def train(self, epoch): self.model.train() if self.model_cfg['freeze_bn']: self.model.apply(freeze_bn) if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader loss_list = [list(), list(), list()] lr = 0 for i, (img_tensor, targets_tensor, _) in enumerate(pbar): if len(self.hyper_params['multi_scale']) > 2: target_size = np.random.choice( self.hyper_params['multi_scale']) img_tensor = interpolate(img_tensor, mode='bilinear', size=target_size, align_corners=False) bs, _, h, w = img_tensor.shape with torch.no_grad(): targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(device=self.device) heatmaps, box_targets, reg_weights = self.gt_generator( targets_tensor, [h, w], bs) self.optimizer.zero_grad() cls_pred, wh_pred = self.model(img_tensor) total_loss, detail_loss = self.creterion(cls_pred, wh_pred, heatmaps, box_targets, reg_weights) total_loss.backward() # nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.optim_cfg['max_norm'], # norm_type=2) self.lr_adjuster(self.optimizer, i, epoch) lr = self.optimizer.param_groups[0]['lr'] self.optimizer.step() self.ema.update(self.model) loss_cls, loss_wh = detail_loss loss_list[0].append(total_loss.item()) loss_list[1].append(loss_cls.item()) loss_list[2].append(loss_wh.item()) if self.local_rank == 0: pbar.set_description( "epoch:{:2d}|size:{:3d}|target_loss:{:6.4f}|loss_cls:{:6.4f}|loss_wh:{:6.4f}|lr:{:8.6f}" .format(epoch + 1, h, total_loss.item(), loss_cls.item(), loss_wh.item(), lr)) self.ema.update_attr(self.model) mean_loss_list = [np.array(item).mean() for item in loss_list] print( "epoch:{:3d}|local:{:3d}|target_loss:{:6.4f}|loss_cls:{:6.4f}|loss_wh:{:6.4f}|lr:{:8.6f}" .format(epoch + 1, self.local_rank, mean_loss_list[0], mean_loss_list[1], mean_loss_list[2], lr)) @torch.no_grad() def val(self, epoch): predict_list = list() target_list = list() self.model.eval() self.ema.ema.eval() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for img_tensor, targets_tensor, _ in pbar: _, _, h, w = img_tensor.shape targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts = self.ema.ema( img_tensor ) #list(predict) predict.shape=[num_box,6] 6==>x1,y1,x2,y2,score,label for i, predict in enumerate(predicts): predict[:, [0, 2]] = predict[:, [0, 2]].clamp(min=0, max=w) predict[:, [1, 3]] = predict[:, [1, 3]].clamp(min=0, max=h) predict_list.append(predict) targets_sample = targets_tensor[targets_tensor[:, 0] == i][:, 2:] target_list.append(targets_sample) mp, mr, map50, map = coco_map(predict_list, target_list) mp = reduce_sum(torch.tensor(mp, device=self.device)).item() / self.gpu_num mr = reduce_sum(torch.tensor(mr, device=self.device)).item() / self.gpu_num map50 = reduce_sum(torch.tensor( map50, device=self.device)).item() / self.gpu_num map = reduce_sum(torch.tensor( map, device=self.device)).item() / self.gpu_num print( "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}" .format(epoch + 1, self.local_rank, mp * 100, mr * 100, map50 * 100, map * 100)) last_weight_path = os.path.join( self.val_cfg['weight_path'], "{:d}_{:s}_last.pth".format(self.local_rank, self.cfg['model_name'])) best_map_weight_path = os.path.join( self.val_cfg['weight_path'], "{:d}_{:s}_best_map.pth".format(self.local_rank, self.cfg['model_name'])) best_map50_weight_path = os.path.join( self.val_cfg['weight_path'], "{:d}_{:s}_best_map50.pth".format(self.local_rank, self.cfg['model_name'])) ema_static = self.ema.ema.state_dict() cpkt = { "ema": ema_static, "map": map * 100, "epoch": epoch, "map50": map50 * 100 } if self.local_rank != 0: return torch.save(cpkt, last_weight_path) if map > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = map if map50 > self.best_map50: torch.save(cpkt, best_map50_weight_path) self.best_map50 = map50 def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch)
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.hyper_params = self.cfg['hyper_params'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(str(self.cfg['gpus']).split(",")) dist.init_process_group(backend='nccl') ########################################################################################### self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=1, num_workers=1, collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) ############################################################################################ model = CenterNet(num_cls=self.model_cfg['num_cls'], PIXEL_MEAN=self.model_cfg['PIXEL_MEAN'], PIXEL_STD=self.model_cfg['PIXEL_STD'], backbone=self.model_cfg['backbone'], cfg=self.model_cfg) self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.gt_generator = CenterNetGT( alpha=self.model_cfg['alpha'], beta=self.model_cfg['beta'], num_cls=self.model_cfg['num_cls'], wh_planes=self.model_cfg['wh_planes'], down_ratio=self.model_cfg['down_ratio'], wh_area_process=self.model_cfg['wh_area_process']) self.creterion = CenterNetLoss( hm_weight=self.hyper_params['hm_weight'], wh_weight=self.hyper_params['wh_weight'], down_ratio=self.model_cfg['down_ratio']) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights'])
class DDPMixProcessor(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(str(self.cfg['gpus']).split(",")) dist.init_process_group(backend='nccl') ### dataloader ------------------------------------------------------------------------------------------------- self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], min_thresh=self.data_cfg['min_thresh'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], min_thresh=self.data_cfg['min_thresh'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) ##### model set ------------------------------------------------------------------------------------------------ model = FasterRCNN(cfg=self.model_cfg) self.scaler = amp.GradScaler(enabled=True) self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], gamma=self.optim_cfg['gamma'], bias_idx=2, milestones=self.optim_cfg['milestones']) def train(self, epoch): self.model.train() if self.model_cfg['freeze_bn']: self.model.apply(freeze_bn) if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader loss_list = [list(), list(), list(), list(), list()] ulr = 0 dlr = 0 for i, (img_tensor, targets_tensor, _) in enumerate(pbar): _, _, h, w = img_tensor.shape with torch.no_grad(): targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) self.optimizer.zero_grad() with amp.autocast(enabled=True): _, total_loss = self.model(img_tensor, None, targets_tensor) rpn_cls_loss = total_loss['rpn_cls_loss'] rpn_box_loss = total_loss['rpn_box_loss'] roi_cls_loss = total_loss['roi_cls_loss'] roi_box_loss = total_loss['roi_box_loss'] loss = rpn_cls_loss + rpn_box_loss + roi_cls_loss + roi_box_loss self.scaler.scale(loss).backward() self.lr_adjuster(self.optimizer, i, epoch) ulr = self.optimizer.param_groups[0]['lr'] dlr = self.optimizer.param_groups[2]['lr'] self.scaler.step(self.optimizer) self.scaler.update() self.ema.update(self.model) loss_list[0].append(loss.item()) loss_list[1].append(rpn_cls_loss.item()) loss_list[2].append(rpn_box_loss.item()) loss_list[3].append(roi_cls_loss.item()) loss_list[4].append(roi_box_loss.item()) if self.local_rank == 0: pbar.set_description( "epoch:{:2d}|loss:{:6.4f}|{:6.4f}|{:6.4f}|{:6.4f}|{:6.4f}|ulr:{:8.6f},dlr:{:8.6f}" .format(epoch + 1, loss.item(), rpn_cls_loss.item(), rpn_box_loss.item(), roi_cls_loss.item(), roi_box_loss.item(), ulr, dlr)) self.ema.update_attr(self.model) mean_loss_list = [np.array(item).mean() for item in loss_list] print( "epoch:{:3d}|local:{:3d}|loss:{:6.4f}|{:6.4f}|:{:6.4f}|{:6.4f}|{:6.4f}|ulr:{:8.6f}|dlr:{:8.6f}" .format(epoch + 1, self.local_rank, mean_loss_list[0], mean_loss_list[1], mean_loss_list[2], mean_loss_list[3], mean_loss_list[4], ulr, dlr)) @torch.no_grad() def val(self, epoch): self.ema.ema.eval() predict_list = list() target_list = list() # self.model.eval() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for i, (img_tensor, targets_tensor, _) in enumerate(pbar): _, _, h, w = img_tensor.shape targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts, _ = self.ema.ema(img_tensor, None) for i, predict in enumerate(predicts): predict_list.append(predict) targets_sample = targets_tensor[targets_tensor[:, 0] == i][:, 2:] target_list.append(targets_sample) mp, mr, map50, map = coco_map(predict_list, target_list) mp = reduce_sum(torch.tensor(mp, device=self.device)).item() / self.gpu_num mr = reduce_sum(torch.tensor(mr, device=self.device)).item() / self.gpu_num map50 = reduce_sum(torch.tensor( map50, device=self.device)).item() / self.gpu_num map = reduce_sum(torch.tensor( map, device=self.device)).item() / self.gpu_num if self.local_rank == 0: print( "epoch: {:2d}|gpu_num:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}" .format(epoch + 1, self.gpu_num, mp * 100, mr * 100, map50 * 100, map * 100)) last_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_last.pth".format(self.cfg['model_name'])) best_map_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map.pth".format(self.cfg['model_name'])) best_map50_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map50.pth".format(self.cfg['model_name'])) ema_static = self.ema.ema.state_dict() cpkt = { "ema": ema_static, "map": map * 100, "epoch": epoch, "map50": map50 * 100 } if self.local_rank != 0: return torch.save(cpkt, last_weight_path) if map > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = map if map50 > self.best_map50: torch.save(cpkt, best_map50_weight_path) self.best_map50 = map50 def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch) dist.destroy_process_group() torch.cuda.empty_cache()
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(str(self.cfg['gpus']).split(",")) dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], min_thresh=self.data_cfg['min_thresh'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], min_thresh=self.data_cfg['min_thresh'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = DETR(self.model_cfg) self.scaler = amp.GradScaler(enabled=True) self.best_map = 0. self.best_map50 = 0. param_dicts = [ { "params": [ p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad ] }, { "params": [ p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad ], "lr": self.optim_cfg['backbone_lr'], }, ] optimizer = torch.optim.AdamW( param_dicts, lr=self.optim_cfg['lr'], weight_decay=self.optim_cfg['weight_decay']) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = torch.optim.lr_scheduler.StepLR( optimizer, self.optim_cfg['decay_steps'])
class DDPMixProcessor(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(str(self.cfg['gpus']).split(",")) dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], min_thresh=self.data_cfg['min_thresh'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], min_thresh=self.data_cfg['min_thresh'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = DETR(self.model_cfg) self.scaler = amp.GradScaler(enabled=True) self.best_map = 0. self.best_map50 = 0. param_dicts = [ { "params": [ p for n, p in model.named_parameters() if "backbone" not in n and p.requires_grad ] }, { "params": [ p for n, p in model.named_parameters() if "backbone" in n and p.requires_grad ], "lr": self.optim_cfg['backbone_lr'], }, ] optimizer = torch.optim.AdamW( param_dicts, lr=self.optim_cfg['lr'], weight_decay=self.optim_cfg['weight_decay']) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = torch.optim.lr_scheduler.StepLR( optimizer, self.optim_cfg['decay_steps']) def train(self, epoch): self.model.train() if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader loss_list = [list(), list(), list(), list()] ulr = 0 dlr = 0 for i, (input_tensor, _) in enumerate(pbar): input_tensor.to(self.device) self.optimizer.zero_grad() with amp.autocast(enabled=True): _, total_loss = self.model(input_tensor) cls_loss = total_loss['cls_loss'] dis_loss = total_loss['dis_loss'] iou_loss = total_loss['iou_loss'] loss = cls_loss + dis_loss + iou_loss self.scaler.scale(loss).backward() ulr = self.optimizer.param_groups[0]['lr'] dlr = self.optimizer.param_groups[1]['lr'] nn.utils.clip_grad_norm_(self.model.parameters(), 0.1) self.scaler.step(self.optimizer) self.scaler.update() self.ema.update(self.model) loss_list[0].append(loss.item()) loss_list[1].append(cls_loss.item() / 6) loss_list[2].append(dis_loss.item() / 6) loss_list[3].append(iou_loss.item() / 6) if self.local_rank == 0: pbar.set_description( "epoch:{:2d}|loss:{:6.4f}|{:6.4f}|{:6.4f}|{:6.4f}|ulr:{:8.6f},dlr:{:8.6f}" .format(epoch + 1, loss.item(), cls_loss.item() / 6, dis_loss.item() / 6, iou_loss.item() / 6, ulr, dlr)) self.lr_adjuster.step() self.ema.update_attr(self.model) mean_loss_list = [np.array(item).mean() for item in loss_list] print( "epoch:{:3d}|local:{:3d}|loss:{:6.4f}|{:6.4f}|:{:6.4f}|{:6.4f}|ulr:{:8.6f}|dlr:{:8.6f}" .format(epoch + 1, self.local_rank, mean_loss_list[0], mean_loss_list[1], mean_loss_list[2], mean_loss_list[3], ulr, dlr)) @torch.no_grad() def val(self, epoch): self.model.eval() self.ema.ema.eval() predict_list = list() target_list = list() # self.model.eval() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for i, (input_tensor, _) in enumerate(pbar): input_tensor.un_normalize_box() input_tensor.to(self.device) predicts, _ = self.model(input_tensor) for i, predict in enumerate(predicts): predict_list.append(predict) boxes = input_tensor.boxes[i] labels = input_tensor.labels[i] target_list.append( torch.cat([labels.float().unsqueeze(-1), boxes], dim=-1)) mp, mr, map50, map = coco_map(predict_list, target_list) mp = reduce_sum(torch.tensor(mp, device=self.device)).item() / self.gpu_num mr = reduce_sum(torch.tensor(mr, device=self.device)).item() / self.gpu_num map50 = reduce_sum(torch.tensor( map50, device=self.device)).item() / self.gpu_num map = reduce_sum(torch.tensor( map, device=self.device)).item() / self.gpu_num if self.local_rank == 0: print( "epoch: {:2d}|gpu_num:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}" .format(epoch + 1, self.gpu_num, mp * 100, mr * 100, map50 * 100, map * 100)) last_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_last.pth".format(self.cfg['model_name'])) best_map_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map.pth".format(self.cfg['model_name'])) best_map50_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map50.pth".format(self.cfg['model_name'])) ema_static = self.ema.ema.state_dict() cpkt = { "ema": ema_static, "map": map * 100, "epoch": epoch, "map50": map50 * 100, "model": self.model.module.state_dict() } if self.local_rank != 0: return torch.save(cpkt, last_weight_path) if map > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = map if map50 > self.best_map50: torch.save(cpkt, best_map50_weight_path) self.best_map50 = map50 def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch) dist.destroy_process_group() torch.cuda.empty_cache()
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] # dataset params self.model_cfg = self.cfg['model'] # model params self.optim_cfg = self.cfg['optim'] # optim params self.hyper_params = self.cfg['hyper_params'] # other hyper params self.val_cfg = self.cfg['val'] # validation hyper params print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg[ 'gpus'] # set avaliable gpu ## load dataset --------------------------------------------------------------------------------------- # self.tdata = COCODataSets(img_root=self.data_cfg['train_img_root'], # annotation_path=self.data_cfg['train_annotation_path'], # img_size=self.data_cfg['img_size'], # debug=self.data_cfg['debug'], # augments=True, # remove_blank=self.data_cfg['remove_blank'], # image_weight = self.hyper_params['use_weight_sample'] # ) self.tdata = BDD100DataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], augments=True, remove_blank=self.data_cfg['remove_blank'], image_weight=self.hyper_params['use_weight_sample']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn) # self.vdata = COCODataSets(img_root=self.data_cfg['val_img_root'], # annotation_path=self.data_cfg['val_annotation_path'], # img_size=self.data_cfg['img_size'], # debug=self.data_cfg['debug'], # augments=False, # remove_blank=False # ) self.vdata = BDD100DataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) ### define model ------------------------------------------------------------------------------------- model = YOLOv5(in_channels=3, num_cls=self.model_cfg['num_cls'], anchors=self.model_cfg['anchors'], strides=self.model_cfg['strides'], scale_name=self.model_cfg['scale_name']) ### check anchor ------------------------------------------------------------------------------------- # check_anchors(self.tdata,model,self.hyper_params['anchor_t'],self.data_cfg['img_size']) ############------------------------------------------------------------------------------------------ self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) self.device = torch.device('cuda:0') model.to(self.device) pretrain = self.model_cfg.get('pretrain', None) if pretrain: pretrained_weights = torch.load(pretrain, map_location=self.device) load_info = model.load_state_dict(pretrained_weights['ema'], strict=False) print('load info ', load_info) # 通过torch1.6自带的api设置混合精度训练 self.scaler = amp.GradScaler(enabled=True) self.model = model self.optimizer = optimizer self.ema = ModelEMA(self.model) self.creterion = YOLOv5LossOriginal( iou_type=self.hyper_params['iou_type'], fl_gamma=self.hyper_params['fl_gamma'], class_smoothing_eps=self.hyper_params['class_smoothing_eps']) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights']) ## for class-aware weighted sampling --------------------------------------------------------------------- self.class_weights = labels_to_class_weights(self.tdata.labels, nc=self.model_cfg['num_cls']).to(self.device) if \ self.hyper_params['use_weight_sample'] else None self.maps = np.zeros(self.model_cfg['num_cls']) # mAP per class
class COCODDPApexProcessor(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] # dataset params self.model_cfg = self.cfg['model'] # model params self.optim_cfg = self.cfg['optim'] # optim params self.hyper_params = self.cfg['hyper_params'] # other hyper params self.val_cfg = self.cfg['val'] # validation hyper params print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.hyper_params) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg[ 'gpus'] # set avaliable gpu ## load dataset --------------------------------------------------------------------------------------- # self.tdata = COCODataSets(img_root=self.data_cfg['train_img_root'], # annotation_path=self.data_cfg['train_annotation_path'], # img_size=self.data_cfg['img_size'], # debug=self.data_cfg['debug'], # augments=True, # remove_blank=self.data_cfg['remove_blank'], # image_weight = self.hyper_params['use_weight_sample'] # ) self.tdata = BDD100DataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], augments=True, remove_blank=self.data_cfg['remove_blank'], image_weight=self.hyper_params['use_weight_sample']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn) # self.vdata = COCODataSets(img_root=self.data_cfg['val_img_root'], # annotation_path=self.data_cfg['val_annotation_path'], # img_size=self.data_cfg['img_size'], # debug=self.data_cfg['debug'], # augments=False, # remove_blank=False # ) self.vdata = BDD100DataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], img_size=self.data_cfg['img_size'], debug=self.data_cfg['debug'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) ### define model ------------------------------------------------------------------------------------- model = YOLOv5(in_channels=3, num_cls=self.model_cfg['num_cls'], anchors=self.model_cfg['anchors'], strides=self.model_cfg['strides'], scale_name=self.model_cfg['scale_name']) ### check anchor ------------------------------------------------------------------------------------- # check_anchors(self.tdata,model,self.hyper_params['anchor_t'],self.data_cfg['img_size']) ############------------------------------------------------------------------------------------------ self.best_map = 0. self.best_map50 = 0. optimizer = split_optimizer(model, self.optim_cfg) self.device = torch.device('cuda:0') model.to(self.device) pretrain = self.model_cfg.get('pretrain', None) if pretrain: pretrained_weights = torch.load(pretrain, map_location=self.device) load_info = model.load_state_dict(pretrained_weights['ema'], strict=False) print('load info ', load_info) # 通过torch1.6自带的api设置混合精度训练 self.scaler = amp.GradScaler(enabled=True) self.model = model self.optimizer = optimizer self.ema = ModelEMA(self.model) self.creterion = YOLOv5LossOriginal( iou_type=self.hyper_params['iou_type'], fl_gamma=self.hyper_params['fl_gamma'], class_smoothing_eps=self.hyper_params['class_smoothing_eps']) self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], cosine_weights=self.optim_cfg['cosine_weights']) ## for class-aware weighted sampling --------------------------------------------------------------------- self.class_weights = labels_to_class_weights(self.tdata.labels, nc=self.model_cfg['num_cls']).to(self.device) if \ self.hyper_params['use_weight_sample'] else None self.maps = np.zeros(self.model_cfg['num_cls']) # mAP per class def train(self, epoch): self.model.train() if self.model_cfg['freeze_bn']: self.model.apply(freeze_bn) if self.hyper_params['use_weight_sample']: cw = self.class_weights.cpu().numpy() * ( 1 - self.maps)**2 # class weight iw = labels_to_image_weights(self.tdata.labels, nc=self.model_cfg['num_cls'], class_weights=cw) # image weight self.tdata.indices = random.choices( range(len(self.tdata)), weights=iw, k=len(self.tdata)) # rand weighted idx pbar = tqdm(self.tloader) loss_list = [list(), list(), list(), list()] # loss_box, loss_obj, loss_cls, loss lr = 0 match_num = 0 for i, (img_tensor, targets_tensor, _, _) in enumerate(pbar): ''' img_tensor: [bs,3,h,w] targets_tensor: [bs,7] (bs_idx,weights,label_idx,x1,y1,x2,y2), box annotations have been normalized ''' if len(self.hyper_params['multi_scale']) >= 2: target_size = np.random.choice( self.hyper_params['multi_scale']) img_tensor = interpolate(img_tensor, mode='bilinear', size=target_size, align_corners=False) _, _, h, w = img_tensor.shape with torch.no_grad(): img_tensor = img_tensor.to(self.device) # bs_idx,weights,label_idx,x1,y1,x2,y2 targets_tensor[:, [ 5, 6 ]] = targets_tensor[:, [5, 6]] - targets_tensor[:, [3, 4]] targets_tensor[:, [ 3, 4 ]] = targets_tensor[:, [3, 4]] + targets_tensor[:, [5, 6]] / 2. targets_tensor = targets_tensor.to(self.device) self.optimizer.zero_grad() # Forward # 混合精度 with amp.autocast(True): ''' predicts(list): len=nl, predicts[i].shape=(bs,3,ny,nx,85) normalized_anchor(torch.Tensor): shape=[3,3,2] ''' predicts, anchors = self.model(img_tensor) # 计算损失,包括分类损失,objectness损失,框的回归损失 # loss为总损失值,loss_items为一个元组,包含分类损失,objectness损失,框的回归损失和总损失 total_loss, detail_loss, total_num = self.creterion( predicts, targets_tensor, anchors) # loss scaled by batch_size self.scaler.scale(total_loss).backward() match_num += total_num # nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.optim_cfg['max_norm'],norm_type=2) self.lr_adjuster(self.optimizer, i, epoch) lr = self.optimizer.param_groups[0]['lr'] self.scaler.step(self.optimizer) self.scaler.update() self.ema.update(self.model) loss_box, loss_obj, loss_cls, loss = detail_loss loss_list[0].append(loss_box.item()) loss_list[1].append(loss_obj.item()) loss_list[2].append(loss_cls.item()) loss_list[3].append(loss.item()) pbar.set_description( "epoch:{:2d}|match_num:{:4d}|size:{:3d}|loss:{:6.4f}|loss_box:{:6.4f}|loss_obj:{:6.4f}|loss_cls:{:6.4f}|lr:{:8.6f}" .format(epoch + 1, int(total_num), h, loss.item(), loss_box.item(), loss_obj.item(), loss_cls.item(), lr)) self.ema.update_attr(self.model) mean_loss_list = [np.array(item).mean() for item in loss_list] print( "epoch:{:3d}|match_num:{:4d}|local:{:3d}|loss:{:6.4f}||loss_box:{:6.4f}|loss_obj:{:6.4f}|loss_cls:{:6.4f}|lr:{:8.6f}" .format(epoch + 1, match_num, 0, mean_loss_list[3], mean_loss_list[0], mean_loss_list[1], mean_loss_list[2], lr)) @torch.no_grad() def val(self, epoch): predict_list = list() target_list = list() # self.model.eval() pbar = tqdm(self.vloader) path_list, shape_list = [], [] for img_tensor, targets_tensor, imgIDs, shapes in pbar: _, _, h, w = img_tensor.shape targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor( data=[w, h, w, h]) img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts = self.ema.ema(img_tensor) # predicts after nms is a list(len=bs), its element has shape=[N,6] 6==>(x1, y1, x2, y2, conf, cls) predicts = non_max_suppression( predicts, conf_thresh=self.val_cfg['conf_thresh'], iou_thresh=self.val_cfg['iou_thresh'], max_det=self.val_cfg['max_det']) for i, predict in enumerate(predicts): if predict is not None: clip_coords(predict, (h, w)) predict_list.append(predict) targets_sample = targets_tensor[targets_tensor[:, 0] == i][:, 2:] target_list.append(targets_sample) path_list.append(imgIDs[i]) shape_list.append(shapes[i]) # after above code block, predict_list(len=len(dataset)), its element shape = [n,6] 6==>(x1,y1,x2,y2,score,cls_id) # target_list(len=len(dataset)), its element shape = [m, 5] 5==>(cls_id,x1,y1,x2,y2) mp, mr, map50, map, self.maps = coco_map(predict_list, target_list, path_list, shape_list, self.data_cfg['img_size']) print( "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}" .format(epoch + 1, 0, mp * 100, mr * 100, map50 * 100, map * 100)) last_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_last.pth".format(self.cfg['model_name'])) best_map_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map.pth".format(self.cfg['model_name'])) best_map50_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_best_map50.pth".format(self.cfg['model_name'])) # model_static = self.model.module.state_dict() if is_parallel(self.model) else self.model.state_dict() ema_static = self.ema.ema.state_dict() cpkt = { "ema": ema_static, "map": map * 100, "epoch": epoch, "map50": map50 * 100 } torch.save(cpkt, last_weight_path) if map > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = map if map50 > self.best_map50: torch.save(cpkt, best_map50_weight_path) self.best_map50 = map50 def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch)