Exemple #1
0
 def __init__(self, cfg_path):
     with open(cfg_path, 'r') as rf:
         self.cfg = yaml.safe_load(rf)
     self.data_cfg = self.cfg['data']
     self.model_cfg = self.cfg['model']
     self.optim_cfg = self.cfg['optim']
     self.val_cfg = self.cfg['val']
     print(self.data_cfg)
     print(self.model_cfg)
     print(self.optim_cfg)
     print(self.val_cfg)
     os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
     self.gpu_num = len(str(self.cfg['gpus']).split(","))
     dist.init_process_group(backend='nccl')
     self.tdata = CustomerDataSets(json_path=self.data_cfg['train_json_path'],
                                   debug=self.data_cfg['debug'],
                                   augment=True,
                                   )
     self.tloader = DataLoader(dataset=self.tdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.tdata.collate_fn,
                               sampler=DistributedSampler(dataset=self.tdata, shuffle=True))
     self.vdata = CustomerDataSets(json_path=self.data_cfg['val_json_path'],
                                   debug=self.data_cfg['debug'],
                                   augment=False,
                                   )
     self.vloader = DataLoader(dataset=self.vdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.vdata.collate_fn,
                               sampler=DistributedSampler(dataset=self.vdata, shuffle=False))
     print("train_data: ", len(self.tdata), " | ",
           "val_data: ", len(self.vdata))
     print("train_iter: ", len(self.tloader), " | ",
           "val_iter: ", len(self.vloader))
     if self.cfg['model_name'] == "v4":
         net = YOLOv4
     elif self.cfg['model_name'] == "v5":
         net = YOLOv5
     else:
         raise NotImplementedError("{:s} not supported yet".format(self.cfg['model_name']))
     model = net(num_cls=self.model_cfg['num_cls'],
                 anchors=self.model_cfg['anchors'],
                 strides=self.model_cfg['strides'],
                 scale_name=self.model_cfg['scale_name'],
                 )
     self.best_map = 0.
     optimizer = split_optimizer(model, self.optim_cfg)
     local_rank = dist.get_rank()
     self.local_rank = local_rank
     self.device = torch.device("cuda", local_rank)
     model.to(self.device)
     self.scaler = amp.GradScaler(enabled=True)
     if self.optim_cfg['sync_bn']:
         model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
     self.model = nn.parallel.distributed.DistributedDataParallel(model,
                                                                  device_ids=[local_rank],
                                                                  output_device=local_rank)
     self.optimizer = optimizer
     self.ema = ModelEMA(self.model)
     self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'],
                                                               warm_up_epoch=self.optim_cfg['warm_up_epoch'],
                                                               iter_per_epoch=len(self.tloader),
                                                               epochs=self.optim_cfg['epochs'],
                                                               alpha=self.optim_cfg['alpha'],
                                                               gamma=self.optim_cfg['gamma'],
                                                               bias_idx=2,
                                                               milestones=self.optim_cfg['milestones']
                                                               )
     self.obj_logger = AverageLogger()
     self.iou_logger = AverageLogger()
     self.loss_logger = AverageLogger()
     self.map_logger = AverageLogger()
Exemple #2
0
 def __init__(self, cfg_path):
     with open(cfg_path, 'r') as rf:
         self.cfg = yaml.safe_load(rf)
     self.data_cfg = self.cfg['data']
     self.model_cfg = self.cfg['model']
     self.optim_cfg = self.cfg['optim']
     self.val_cfg = self.cfg['val']
     print(self.data_cfg)
     print(self.model_cfg)
     print(self.optim_cfg)
     print(self.val_cfg)
     os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
     self.gpu_num = len(self.cfg['gpus'].split(','))
     dist.init_process_group(backend='nccl')
     self.tdata = COCODataSets(img_root=self.data_cfg['train_img_root'],
                               annotation_path=self.data_cfg['train_annotation_path'],
                               max_thresh=self.data_cfg['max_thresh'],
                               debug=self.data_cfg['debug'],
                               use_crowd=self.data_cfg['use_crowd'],
                               augments=True,
                               remove_blank=self.data_cfg['remove_blank']
                               )
     self.tloader = DataLoader(dataset=self.tdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.tdata.collect_fn,
                               sampler=DistributedSampler(dataset=self.tdata, shuffle=True))
     self.vdata = COCODataSets(img_root=self.data_cfg['val_img_root'],
                               annotation_path=self.data_cfg['val_annotation_path'],
                               max_thresh=self.data_cfg['max_thresh'],
                               debug=self.data_cfg['debug'],
                               use_crowd=self.data_cfg['use_crowd'],
                               augments=False,
                               remove_blank=False
                               )
     self.vloader = DataLoader(dataset=self.vdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.vdata.collect_fn,
                               sampler=DistributedSampler(dataset=self.vdata, shuffle=False))
     print("train_data: ", len(self.tdata), " | ",
           "val_data: ", len(self.vdata), " | ",
           "empty_data: ", self.tdata.empty_images_len)
     print("train_iter: ", len(self.tloader), " | ",
           "val_iter: ", len(self.vloader))
     model = MaskRCNN(**self.model_cfg)
     self.best_map = 0.
     optimizer = split_optimizer(model, self.optim_cfg)
     local_rank = dist.get_rank()
     self.local_rank = local_rank
     self.device = torch.device("cuda", local_rank)
     model.to(self.device)
     if self.optim_cfg['sync_bn']:
         model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
     self.model = nn.parallel.distributed.DistributedDataParallel(model,
                                                                  device_ids=[local_rank],
                                                                  output_device=local_rank)
     self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['amp'] else None
     self.optimizer = optimizer
     self.ema = ModelEMA(self.model)
     self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'],
                                                               milestones=self.optim_cfg['milestones'],
                                                               warm_up_epoch=self.optim_cfg['warm_up_epoch'],
                                                               iter_per_epoch=len(self.tloader),
                                                               epochs=self.optim_cfg['epochs'],
                                                               )
     self.cls_loss = AverageLogger()
     self.box_loss = AverageLogger()
     self.mask_loss = AverageLogger()
     self.loss = AverageLogger()