class DDPApexProcessor(object):
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        dist.init_process_group(backend='nccl')
        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))
        model = RetinaNet(
            num_cls=self.model_cfg['num_cls'],
            anchor_sizes=self.model_cfg['anchor_sizes'],
            strides=self.model_cfg['strides'],
            backbone=self.model_cfg['backbone'],
        )
        if self.model_cfg.get("backbone_weight", None):
            weights = torch.load(self.model_cfg['backbone_weight'])
            model.load_backbone_weighs(weights)
        self.best_map = 0.
        self.best_map50 = 0.
        optimizer = split_optimizer(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level='O1',
                                          verbosity=0)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        beta = eval(self.hyper_params['beta']) if isinstance(self.hyper_params['beta'], str) \
            else self.hyper_params['beta']

        self.creterion = RetinaAnchorFreeLoss(
            alpha=self.hyper_params['alpha'],
            gamma=self.hyper_params['gamma'],
            beta=beta,
            top_k=self.hyper_params['top_k'],
            box_iou_thresh=self.hyper_params['box_iou_thresh'],
            box_reg_weight=self.hyper_params['box_reg_weight'])
        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            cosine_weights=self.optim_cfg['cosine_weights'])

    def train(self, epoch):
        self.model.train()
        if self.model_cfg['freeze_bn']:
            self.model.apply(freeze_bn)
        if self.local_rank == 0:
            pbar = tqdm(self.tloader)
        else:
            pbar = self.tloader
        loss_list = [list(), list(), list()]
        lr = 0
        match_num = 0
        for i, (img_tensor, targets_tensor, _) in enumerate(pbar):
            if len(self.hyper_params['multi_scale']) > 2:
                target_size = np.random.choice(
                    self.hyper_params['multi_scale'])
                img_tensor = interpolate(img_tensor,
                                         mode='bilinear',
                                         size=target_size,
                                         align_corners=False)
            _, _, h, w = img_tensor.shape
            with torch.no_grad():
                targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                    data=[w, h, w, h])
                img_tensor = img_tensor.to(self.device)
                targets_tensor = targets_tensor.to(self.device)
            self.optimizer.zero_grad()
            cls_predicts, reg_predicts, anchors = self.model(img_tensor)
            total_loss, detail_loss, total_num = self.creterion(
                cls_predicts, reg_predicts, anchors, targets_tensor)
            match_num += total_num
            with amp.scale_loss(total_loss, self.optimizer) as scaled_loss:
                scaled_loss.backward()
            nn.utils.clip_grad_norm_(amp.master_params(self.optimizer),
                                     max_norm=self.optim_cfg['max_norm'],
                                     norm_type=2)
            self.lr_adjuster(self.optimizer, i, epoch)
            lr = self.optimizer.param_groups[0]['lr']
            self.optimizer.step()
            self.ema.update(self.model)
            loss_cls, loss_reg = detail_loss
            loss_list[0].append(total_loss.item())
            loss_list[1].append(loss_cls.item())
            loss_list[2].append(loss_reg.item())
            if self.local_rank == 0:
                pbar.set_description(
                    "epoch:{:2d}|match_num:{:4d}|size:{:3d}|total_loss:{:6.4f}|neg_cls:{:6.4f}|pos_reg:{:6.4f}|lr:{:8.6f}"
                    .format(epoch + 1, total_num, h, total_loss.item(),
                            loss_cls.item(), loss_reg.item(), lr))
        self.ema.update_attr(self.model)
        mean_loss_list = [np.array(item).mean() for item in loss_list]
        print(
            "epoch:{:3d}|match_num:{:4d}|local:{:3d}|total_loss:{:6.4f}|neg_cls:{:6.4f}|pos_reg:{:6.4f}|lr:{:8.6f}"
            .format(epoch + 1, match_num, self.local_rank, mean_loss_list[0],
                    mean_loss_list[1], mean_loss_list[2], lr))

    @torch.no_grad()
    def val(self, epoch):
        predict_list = list()
        target_list = list()
        self.model.eval()
        if self.local_rank == 0:
            pbar = tqdm(self.vloader)
        else:
            pbar = self.vloader
        for img_tensor, targets_tensor, _ in pbar:
            _, _, h, w = img_tensor.shape
            targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                data=[w, h, w, h])
            img_tensor = img_tensor.to(self.device)
            targets_tensor = targets_tensor.to(self.device)
            predicts = self.model(img_tensor)

            for i in range(len(predicts)):
                predicts[i][:, [0, 2]] = predicts[i][:, [0, 2]].clamp(min=0,
                                                                      max=w)
                predicts[i][:, [1, 3]] = predicts[i][:, [1, 3]].clamp(min=0,
                                                                      max=h)
            predicts = non_max_suppression(
                predicts,
                conf_thresh=self.val_cfg['conf_thresh'],
                iou_thresh=self.val_cfg['iou_thresh'],
                max_det=self.val_cfg['max_det'],
            )
            for i, predict in enumerate(predicts):
                predict_list.append(predict)
                targets_sample = targets_tensor[targets_tensor[:, 0] == i][:,
                                                                           2:]
                target_list.append(targets_sample)
        mp, mr, map50, map = coco_map(predict_list, target_list)
        print(
            "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}"
            .format(epoch + 1, self.local_rank, mp * 100, mr * 100,
                    map50 * 100, map * 100))
        last_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:d}_{:s}_last.pth".format(self.local_rank,
                                        self.cfg['model_name']))
        best_map_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:d}_{:s}_best_map.pth".format(self.local_rank,
                                            self.cfg['model_name']))
        best_map50_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:d}_{:s}_best_map50.pth".format(self.local_rank,
                                              self.cfg['model_name']))
        model_static = self.model.module.state_dict() if is_parallel(
            self.model) else self.model.state_dict()

        ema_static = self.ema.ema.state_dict()
        cpkt = {
            "ori": model_static,
            "ema": ema_static,
            "map": map * 100,
            "epoch": epoch,
            "map50": map50 * 100
        }
        if self.local_rank != 0:
            return
        torch.save(cpkt, last_weight_path)
        if map > self.best_map:
            torch.save(cpkt, best_map_weight_path)
            self.best_map = map
        if map50 > self.best_map50:
            torch.save(cpkt, best_map50_weight_path)
            self.best_map50 = map50

    def run(self):
        for epoch in range(self.optim_cfg['epochs']):
            self.train(epoch)
            if (epoch + 1) % self.val_cfg['interval'] == 0:
                self.val(epoch)
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        dist.init_process_group(backend='nccl')
        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))
        model = RetinaNet(
            num_cls=self.model_cfg['num_cls'],
            anchor_sizes=self.model_cfg['anchor_sizes'],
            strides=self.model_cfg['strides'],
            backbone=self.model_cfg['backbone'],
        )
        if self.model_cfg.get("backbone_weight", None):
            weights = torch.load(self.model_cfg['backbone_weight'])
            model.load_backbone_weighs(weights)
        self.best_map = 0.
        self.best_map50 = 0.
        optimizer = split_optimizer(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        model, optimizer = amp.initialize(model,
                                          optimizer,
                                          opt_level='O1',
                                          verbosity=0)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        beta = eval(self.hyper_params['beta']) if isinstance(self.hyper_params['beta'], str) \
            else self.hyper_params['beta']

        self.creterion = RetinaAnchorFreeLoss(
            alpha=self.hyper_params['alpha'],
            gamma=self.hyper_params['gamma'],
            beta=beta,
            top_k=self.hyper_params['top_k'],
            box_iou_thresh=self.hyper_params['box_iou_thresh'],
            box_reg_weight=self.hyper_params['box_reg_weight'])
        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            cosine_weights=self.optim_cfg['cosine_weights'])
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        dist.init_process_group(backend='nccl')
        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            augments=True,
            use_crowd=self.data_cfg['use_crowd'],
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            augments=False,
            use_crowd=self.data_cfg['use_crowd'],
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))
        model = YOLOv5(
            num_cls=self.model_cfg['num_cls'],
            anchors=self.model_cfg['anchors'],
            strides=self.model_cfg['strides'],
            scale_name=self.model_cfg['scale_name'],
        )
        self.best_map = 0.
        self.best_map50 = 0.
        optimizer = split_optimizer(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        pretrain = self.model_cfg.get("pretrain", None)
        if pretrain:
            pretrain_weights = torch.load(pretrain, map_location=self.device)
            load_info = model.load_state_dict(pretrain_weights, strict=False)
            print("load_info ", load_info)
        self.scaler = amp.GradScaler(enabled=True)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        self.creterion = YOLOv5LossOriginal(
            iou_type=self.hyper_params['iou_type'], )
        self.lr_adjuster = EpochWarmUpCosineDecayLRAdjust(
            init_lr=self.optim_cfg['lr'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            alpha=self.optim_cfg['alpha'],
            gamma=self.optim_cfg['gamma'],
            bias_idx=2)
Exemple #4
0
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        dist.init_process_group(backend='nccl')
        img_size = int(self.model_cfg['compound_coef']) * 128 + 512
        self.tdata = COCODataSets(img_root=self.data_cfg['train_img_root'],
                                  annotation_path=self.data_cfg['train_annotation_path'],
                                  img_size=img_size,
                                  debug=self.data_cfg['debug'],
                                  use_crowd=self.data_cfg['use_crowd'],
                                  augments=True,
                                  remove_blank=self.data_cfg['remove_blank']
                                  )
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(img_root=self.data_cfg['val_img_root'],
                                  annotation_path=self.data_cfg['val_annotation_path'],
                                  img_size=img_size,
                                  debug=self.data_cfg['debug'],
                                  use_crowd=self.data_cfg['use_crowd'],
                                  augments=False,
                                  remove_blank=False
                                  )
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ",
              "val_data: ", len(self.vdata), " | ",
              "empty_data: ", self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ",
              "val_iter: ", len(self.vloader))
        model = EfficientDet(num_cls=self.model_cfg['num_cls'],
                             compound_coef=self.model_cfg['compound_coef']
                             )
        self.best_map = 0.
        self.best_map50 = 0.
        optimizer = split_optimizer(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        self.scaler = amp.GradScaler(enabled=True)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(model,
                                                                     device_ids=[local_rank],
                                                                     output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        self.creterion = RetinaLoss(iou_thresh=self.hyper_params['iou_thresh'],
                                    ignore_thresh=self.hyper_params['ignore_thresh'],
                                    alpha=self.hyper_params['alpha'],
                                    gamma=self.hyper_params['gamma'],
                                    iou_type=self.hyper_params['iou_type'],
                                    coord_type=self.hyper_params['coord_type']
                                    )
        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'],
                                                              milestones=self.optim_cfg['milestones'],
                                                              warm_up_epoch=self.optim_cfg['warm_up_epoch'],
                                                              iter_per_epoch=len(self.tloader),
                                                              epochs=self.optim_cfg['epochs'],
                                                              cosine_weights=self.optim_cfg['cosine_weights']
                                                              )
Exemple #5
0
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)

        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        dist.init_process_group(backend='nccl', init_method='env://')

        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))

        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)

        model = FCOS(
            num_cls=self.model_cfg['num_cls'],
            strides=self.model_cfg['strides'],
            backbone=self.model_cfg['backbone'],
        )
        optimizer = split_optimizer(model, self.optim_cfg)
        model.to(self.device)
        pretrain = self.model_cfg.get('pretrain', None)
        if pretrain is not None:
            pretrained_weights = torch.load(pretrain, map_location=self.device)
            load_info = model.load_state_dict(pretrained_weights['ema'],
                                              strict=False)
            print('load info ', load_info)

        self.scaler = amp.GradScaler(enabled=True)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        self.creterion = FCOSLoss(
            alpha=self.hyper_params['alpha'],
            gamma=self.hyper_params['gamma'],
            radius=self.hyper_params['radius'],
            layer_limits=self.hyper_params['layer_limits'],
            strides=self.model_cfg['strides'],
            iou_type=self.hyper_params['iou_type'])
        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            cosine_weights=self.optim_cfg['cosine_weights'])

        self.tb_writer = None
        if self.local_rank == 0:
            log_dir = 'runs/'
            print(
                'Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/'
                % log_dir)
            self.tb_writer = SummaryWriter(log_dir=log_dir)

        self.best_map = 0.
        self.best_map50 = 0.
class COCODDPApexProcessor(object):
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        dist.init_process_group(backend='nccl')
        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            augments=True,
            use_crowd=self.data_cfg['use_crowd'],
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            augments=False,
            use_crowd=self.data_cfg['use_crowd'],
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))
        model = YOLOv5(
            num_cls=self.model_cfg['num_cls'],
            anchors=self.model_cfg['anchors'],
            strides=self.model_cfg['strides'],
            scale_name=self.model_cfg['scale_name'],
        )
        self.best_map = 0.
        self.best_map50 = 0.
        optimizer = split_optimizer(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        pretrain = self.model_cfg.get("pretrain", None)
        if pretrain:
            pretrain_weights = torch.load(pretrain, map_location=self.device)
            load_info = model.load_state_dict(pretrain_weights, strict=False)
            print("load_info ", load_info)
        self.scaler = amp.GradScaler(enabled=True)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        self.creterion = YOLOv5LossOriginal(
            iou_type=self.hyper_params['iou_type'], )
        self.lr_adjuster = EpochWarmUpCosineDecayLRAdjust(
            init_lr=self.optim_cfg['lr'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            alpha=self.optim_cfg['alpha'],
            gamma=self.optim_cfg['gamma'],
            bias_idx=2)

    def train(self, epoch):
        self.model.train()
        if self.model_cfg['freeze_bn']:
            self.model.apply(freeze_bn)
        if self.local_rank == 0:
            pbar = tqdm(self.tloader)
        else:
            pbar = self.tloader
        loss_list = [list(), list(), list(), list()]
        ulr = 0
        dlr = 0
        match_num = 0
        for i, (img_tensor, targets_tensor, _) in enumerate(pbar):
            if len(self.hyper_params['multi_scale']) > 2:
                target_size = np.random.choice(
                    self.hyper_params['multi_scale'])
                img_tensor = interpolate(img_tensor,
                                         mode='bilinear',
                                         size=target_size,
                                         align_corners=False)
            _, _, h, w = img_tensor.shape
            with torch.no_grad():
                img_tensor = img_tensor.to(self.device)
                # bs_idx,weights,label_idx,x1,y1,x2,y2
                targets_tensor[:, [
                    5, 6
                ]] = targets_tensor[:, [5, 6]] - targets_tensor[:, [3, 4]]
                targets_tensor[:, [
                    3, 4
                ]] = targets_tensor[:, [3, 4]] + targets_tensor[:, [5, 6]] / 2.
                targets_tensor = targets_tensor.to(self.device)
            self.optimizer.zero_grad()
            with amp.autocast(enabled=True):
                predicts, anchors = self.model(img_tensor)
                total_loss, detail_loss, total_num = self.creterion(
                    predicts, targets_tensor, anchors)
            self.scaler.scale(total_loss).backward()
            match_num += total_num
            self.lr_adjuster(self.optimizer, i, epoch)
            ulr = self.optimizer.param_groups[0]['lr']
            dlr = self.optimizer.param_groups[2]['lr']
            self.scaler.step(self.optimizer)
            self.scaler.update()
            self.ema.update(self.model)
            loss_box, loss_obj, loss_cls, loss = detail_loss
            loss_list[0].append(loss_box.item())
            loss_list[1].append(loss_obj.item())
            loss_list[2].append(loss_cls.item())
            loss_list[3].append(loss.item())
            if self.local_rank == 0:
                pbar.set_description(
                    "epoch:{:2d}|match_num:{:4d}|size:{:3d}|loss:{:6.4f}|loss_box:{:6.4f}|loss_obj:{:6.4f}|loss_cls:{:6.4f}|ulr:{:8.6f},dlr:{:8.6f}"
                    .format(epoch + 1, int(total_num), h, loss.item(),
                            loss_box.item(), loss_obj.item(), loss_cls.item(),
                            ulr, dlr))
        self.ema.update_attr(self.model)
        mean_loss_list = [np.array(item).mean() for item in loss_list]
        print(
            "epoch:{:3d}|match_num:{:4d}|local:{:3d}|loss:{:6.4f}||loss_box:{:6.4f}|loss_obj:{:6.4f}|loss_cls:{:6.4f}|ulr:{:8.6f}|dlr:{:8.6f}"
            .format(epoch + 1, match_num, self.local_rank, mean_loss_list[3],
                    mean_loss_list[0], mean_loss_list[1], mean_loss_list[2],
                    ulr, dlr))

    @torch.no_grad()
    def val(self, epoch):
        predict_list = list()
        target_list = list()
        # self.model.eval()
        if self.local_rank == 0:
            pbar = tqdm(self.vloader)
        else:
            pbar = self.vloader
        for img_tensor, targets_tensor, _ in pbar:
            _, _, h, w = img_tensor.shape
            targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                data=[w, h, w, h])
            img_tensor = img_tensor.to(self.device)
            targets_tensor = targets_tensor.to(self.device)
            predicts = self.ema.ema(img_tensor)
            predicts = non_max_suppression(
                predicts,
                conf_thresh=self.val_cfg['conf_thresh'],
                iou_thresh=self.val_cfg['iou_thresh'],
                max_det=self.val_cfg['max_det'],
            )
            for i, predict in enumerate(predicts):
                if predict is not None:
                    clip_coords(predict, (h, w))
                predict_list.append(predict)
                targets_sample = targets_tensor[targets_tensor[:, 0] == i][:,
                                                                           2:]
                target_list.append(targets_sample)
        mp, mr, map50, map = coco_map(predict_list, target_list)
        print(
            "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}"
            .format(epoch + 1, self.local_rank, mp * 100, mr * 100,
                    map50 * 100, map * 100))
        last_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_last.pth".format(self.cfg['model_name']))
        best_map_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map.pth".format(self.cfg['model_name']))
        best_map50_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map50.pth".format(self.cfg['model_name']))
        # model_static = self.model.module.state_dict() if is_parallel(self.model) else self.model.state_dict()

        ema_static = self.ema.ema.state_dict()
        cpkt = {
            "ema": ema_static,
            "map": map * 100,
            "epoch": epoch,
            "map50": map50 * 100
        }
        if self.local_rank != 0:
            return
        torch.save(cpkt, last_weight_path)
        if map > self.best_map:
            torch.save(cpkt, best_map_weight_path)
            self.best_map = map
        if map50 > self.best_map50:
            torch.save(cpkt, best_map50_weight_path)
            self.best_map50 = map50

    def run(self):
        for epoch in range(self.optim_cfg['epochs']):
            self.train(epoch)
            if (epoch + 1) % self.val_cfg['interval'] == 0:
                self.val(epoch)
        dist.destroy_process_group()
        torch.cuda.empty_cache()
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)

        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  shuffle=True)
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  shuffle=False)
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))
        model = RetinaNet(
            num_cls=self.model_cfg['num_cls'],
            anchor_sizes=self.model_cfg['anchor_sizes'],
            strides=self.model_cfg['strides'],
            backbone=self.model_cfg['backbone'],
        )
        if self.model_cfg.get("backbone_weight", None):
            weights = torch.load(self.model_cfg['backbone_weight'])
            model.load_backbone_weighs(weights)
        self.best_map = 0.
        self.best_map50 = 0.
        optimizer = split_optimizer(model, self.optim_cfg)

        self.local_rank = 0
        self.device = torch.device("cuda:0")
        model.to(self.device)

        self.model = model
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        beta = eval(self.hyper_params['beta']) if isinstance(self.hyper_params['beta'], str) \
            else self.hyper_params['beta']

        self.creterion = RetinaAnchorFreeLoss(
            alpha=self.hyper_params['alpha'],
            gamma=self.hyper_params['gamma'],
            beta=beta,
            top_k=self.hyper_params['top_k'],
            box_iou_thresh=self.hyper_params['box_iou_thresh'],
            box_reg_weight=self.hyper_params['box_reg_weight'])
        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            cosine_weights=self.optim_cfg['cosine_weights'])
Exemple #8
0
class DDPMixProcessor(object):
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)

        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        dist.init_process_group(backend='nccl', init_method='env://')

        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))

        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)

        model = FCOS(
            num_cls=self.model_cfg['num_cls'],
            strides=self.model_cfg['strides'],
            backbone=self.model_cfg['backbone'],
        )
        optimizer = split_optimizer(model, self.optim_cfg)
        model.to(self.device)
        pretrain = self.model_cfg.get('pretrain', None)
        if pretrain is not None:
            pretrained_weights = torch.load(pretrain, map_location=self.device)
            load_info = model.load_state_dict(pretrained_weights['ema'],
                                              strict=False)
            print('load info ', load_info)

        self.scaler = amp.GradScaler(enabled=True)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        self.creterion = FCOSLoss(
            alpha=self.hyper_params['alpha'],
            gamma=self.hyper_params['gamma'],
            radius=self.hyper_params['radius'],
            layer_limits=self.hyper_params['layer_limits'],
            strides=self.model_cfg['strides'],
            iou_type=self.hyper_params['iou_type'])
        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            cosine_weights=self.optim_cfg['cosine_weights'])

        self.tb_writer = None
        if self.local_rank == 0:
            log_dir = 'runs/'
            print(
                'Start Tensorboard with "tensorboard --logdir %s", view at http://localhost:6006/'
                % log_dir)
            self.tb_writer = SummaryWriter(log_dir=log_dir)

        self.best_map = 0.
        self.best_map50 = 0.

    def train(self, epoch):
        self.model.train()
        if self.model_cfg['freeze_bn']:
            self.model.apply(freeze_bn)
        if self.local_rank == 0:
            pbar = tqdm(self.tloader)
        else:
            pbar = self.tloader
        loss_list = [list(), list(), list(), list()]
        lr = 0
        match_num = 0
        for i, (img_tensor, targets_tensor, _) in enumerate(pbar):
            if len(self.hyper_params['multi_scale']) > 2:
                target_size = np.random.choice(
                    self.hyper_params['multi_scale'])
                img_tensor = interpolate(img_tensor,
                                         mode='bilinear',
                                         size=target_size,
                                         align_corners=False)
            _, _, h, w = img_tensor.shape
            with torch.no_grad():
                targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                    data=[w, h, w, h])
                img_tensor = img_tensor.to(self.device)
                targets_tensor = targets_tensor.to(self.device)
            self.optimizer.zero_grad()
            # Forward
            # 混合精度
            with amp.autocast(enabled=True):
                cls_outputs, reg_outputs, center_outputs, grids = self.model(
                    img_tensor)
                total_loss, detail_loss, total_num = self.creterion(
                    cls_outputs, reg_outputs, center_outputs, grids,
                    targets_tensor)
            match_num += total_num
            self.scaler.scale(total_loss).backward()
            self.lr_adjuster(self.optimizer, i, epoch)
            lr0 = self.optimizer.param_groups[0]['lr']
            lr1 = self.optimizer.param_groups[1]['lr']
            lr2 = self.optimizer.param_groups[2]['lr']
            self.scaler.step(self.optimizer)
            self.scaler.update()
            self.ema.update(self.model)

            loss_cls, loss_reg, loss_center = detail_loss
            loss_list[0].append(total_loss.item())
            loss_list[1].append(loss_cls.item())
            loss_list[2].append(loss_reg.item())
            loss_list[3].append(loss_center.item())
            if self.local_rank == 0:
                pbar.set_description(
                    "epoch:{:3d}|match_num:{:4d}|size:{:3d}|target_loss:{:6.4f}|loss_cls:{:6.4f}|loss_reg:{:6.4f}|loss_cen:{:6.4f}|lr:{:8.6f}"
                    .format(epoch + 1, int(total_num), h, total_loss.item(),
                            loss_cls.item(), loss_reg.item(),
                            loss_center.item(), lr0))
            if self.tb_writer:
                tags = [
                    'train/total_loss',
                    'train/cls_loss',
                    'train/reg_loss',
                    'train/center_loss',  # train loss
                    'x/lr0',
                    'x/lr1',
                    'x/lr2'
                ]  # params
                for x, tag in zip([
                        total_loss.item(),
                        loss_cls.item(),
                        loss_reg.item(),
                        loss_center.item(), lr0, lr1, lr2
                ], tags):
                    self.tb_writer.add_scalar(
                        tag, x, i + 1 + epoch * len(self.tloader))

        self.ema.update_attr(self.model)
        mean_loss_list = [np.array(item).mean() for item in loss_list]
        print(
            "epoch:{:3d}|match_num:{:4d}|local:{:3d}|target_loss:{:6.4f}|loss_cls:{:6.4f}|loss_reg:{:6.4f}|loss_cen:{:6.4f}|lr:{:8.6f}"
            .format(epoch + 1, int(match_num), self.local_rank,
                    mean_loss_list[0], mean_loss_list[1], mean_loss_list[2],
                    mean_loss_list[3], lr))

    @torch.no_grad()
    def val(self, epoch):
        predict_list = list()
        target_list = list()
        self.model.eval()
        if self.local_rank == 0:
            pbar = tqdm(self.vloader)
        else:
            pbar = self.vloader
        for img_tensor, targets_tensor, _ in pbar:
            _, _, h, w = img_tensor.shape
            targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                data=[w, h, w, h])
            img_tensor = img_tensor.to(self.device)
            targets_tensor = targets_tensor.to(self.device)
            predicts = self.ema.ema(img_tensor)

            for i in range(len(predicts)):
                predicts[i][:, [0, 2]] = predicts[i][:, [0, 2]].clamp(min=0,
                                                                      max=w)
                predicts[i][:, [1, 3]] = predicts[i][:, [1, 3]].clamp(min=0,
                                                                      max=h)
            predicts = non_max_suppression(
                predicts,
                conf_thresh=self.val_cfg['conf_thresh'],
                iou_thresh=self.val_cfg['iou_thresh'],
                max_det=self.val_cfg['max_det'],
            )
            for i, predict in enumerate(predicts):
                predict_list.append(predict)
                targets_sample = targets_tensor[targets_tensor[:, 0] == i][:,
                                                                           2:]
                target_list.append(targets_sample)
        mp, mr, map50, map = coco_map(predict_list, target_list)
        print(
            "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}"
            .format(epoch + 1, self.local_rank, mp * 100, mr * 100,
                    map50 * 100, map * 100))
        last_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_last.pth".format(self.cfg['model_name']))
        best_map_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map.pth".format(self.cfg['model_name']))
        best_map50_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map50.pth".format(self.cfg['model_name']))

        ema_static = self.ema.ema.state_dict()
        cpkt = {
            "ema": ema_static,
            "map": map * 100,
            "epoch": epoch,
            "map50": map50 * 100
        }
        if self.local_rank != 0:
            return

        if self.tb_writer:
            tags = [
                'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5',
                'metrics/mAP_0.5:0.95'
            ]
            for x, tag in zip([mp, mr, map50, map], tags):
                self.tb_writer.add_scalar(tag, x, epoch + 1)

        torch.save(cpkt, last_weight_path)
        if map > self.best_map:
            torch.save(cpkt, best_map_weight_path)
            self.best_map = map
        if map50 > self.best_map50:
            torch.save(cpkt, best_map50_weight_path)
            self.best_map50 = map50

    def run(self):
        for epoch in range(self.optim_cfg['epochs']):
            self.train(epoch)
            if (epoch + 1) % self.val_cfg['interval'] == 0:
                self.val(epoch)
        dist.destroy_process_group()
        torch.cuda.empty_cache()
Exemple #9
0
class DDPApexProcessor(object):
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        self.gpu_num = len(str(self.cfg['gpus']).split(","))
        dist.init_process_group(backend='nccl')
        ###########################################################################################
        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=1,
                                  num_workers=1,
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))
        ############################################################################################
        model = CenterNet(num_cls=self.model_cfg['num_cls'],
                          PIXEL_MEAN=self.model_cfg['PIXEL_MEAN'],
                          PIXEL_STD=self.model_cfg['PIXEL_STD'],
                          backbone=self.model_cfg['backbone'],
                          cfg=self.model_cfg)

        self.best_map = 0.
        self.best_map50 = 0.
        optimizer = split_optimizer(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        self.gt_generator = CenterNetGT(
            alpha=self.model_cfg['alpha'],
            beta=self.model_cfg['beta'],
            num_cls=self.model_cfg['num_cls'],
            wh_planes=self.model_cfg['wh_planes'],
            down_ratio=self.model_cfg['down_ratio'],
            wh_area_process=self.model_cfg['wh_area_process'])

        self.creterion = CenterNetLoss(
            hm_weight=self.hyper_params['hm_weight'],
            wh_weight=self.hyper_params['wh_weight'],
            down_ratio=self.model_cfg['down_ratio'])

        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            cosine_weights=self.optim_cfg['cosine_weights'])

    def train(self, epoch):
        self.model.train()
        if self.model_cfg['freeze_bn']:
            self.model.apply(freeze_bn)
        if self.local_rank == 0:
            pbar = tqdm(self.tloader)
        else:
            pbar = self.tloader

        loss_list = [list(), list(), list()]
        lr = 0

        for i, (img_tensor, targets_tensor, _) in enumerate(pbar):
            if len(self.hyper_params['multi_scale']) > 2:
                target_size = np.random.choice(
                    self.hyper_params['multi_scale'])
                img_tensor = interpolate(img_tensor,
                                         mode='bilinear',
                                         size=target_size,
                                         align_corners=False)
            bs, _, h, w = img_tensor.shape
            with torch.no_grad():
                targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                    data=[w, h, w, h])
                img_tensor = img_tensor.to(device=self.device)
                heatmaps, box_targets, reg_weights = self.gt_generator(
                    targets_tensor, [h, w], bs)

            self.optimizer.zero_grad()
            cls_pred, wh_pred = self.model(img_tensor)
            total_loss, detail_loss = self.creterion(cls_pred, wh_pred,
                                                     heatmaps, box_targets,
                                                     reg_weights)
            total_loss.backward()
            # nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.optim_cfg['max_norm'],
            #                          norm_type=2)
            self.lr_adjuster(self.optimizer, i, epoch)
            lr = self.optimizer.param_groups[0]['lr']
            self.optimizer.step()
            self.ema.update(self.model)
            loss_cls, loss_wh = detail_loss
            loss_list[0].append(total_loss.item())
            loss_list[1].append(loss_cls.item())
            loss_list[2].append(loss_wh.item())
            if self.local_rank == 0:
                pbar.set_description(
                    "epoch:{:2d}|size:{:3d}|target_loss:{:6.4f}|loss_cls:{:6.4f}|loss_wh:{:6.4f}|lr:{:8.6f}"
                    .format(epoch + 1, h, total_loss.item(), loss_cls.item(),
                            loss_wh.item(), lr))
        self.ema.update_attr(self.model)
        mean_loss_list = [np.array(item).mean() for item in loss_list]
        print(
            "epoch:{:3d}|local:{:3d}|target_loss:{:6.4f}|loss_cls:{:6.4f}|loss_wh:{:6.4f}|lr:{:8.6f}"
            .format(epoch + 1, self.local_rank, mean_loss_list[0],
                    mean_loss_list[1], mean_loss_list[2], lr))

    @torch.no_grad()
    def val(self, epoch):
        predict_list = list()
        target_list = list()
        self.model.eval()
        self.ema.ema.eval()
        if self.local_rank == 0:
            pbar = tqdm(self.vloader)
        else:
            pbar = self.vloader
        for img_tensor, targets_tensor, _ in pbar:
            _, _, h, w = img_tensor.shape
            targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                data=[w, h, w, h])
            img_tensor = img_tensor.to(self.device)
            targets_tensor = targets_tensor.to(self.device)
            predicts = self.ema.ema(
                img_tensor
            )  #list(predict) predict.shape=[num_box,6]  6==>x1,y1,x2,y2,score,label
            for i, predict in enumerate(predicts):
                predict[:, [0, 2]] = predict[:, [0, 2]].clamp(min=0, max=w)
                predict[:, [1, 3]] = predict[:, [1, 3]].clamp(min=0, max=h)
                predict_list.append(predict)
                targets_sample = targets_tensor[targets_tensor[:, 0] == i][:,
                                                                           2:]
                target_list.append(targets_sample)
        mp, mr, map50, map = coco_map(predict_list, target_list)
        mp = reduce_sum(torch.tensor(mp,
                                     device=self.device)).item() / self.gpu_num
        mr = reduce_sum(torch.tensor(mr,
                                     device=self.device)).item() / self.gpu_num
        map50 = reduce_sum(torch.tensor(
            map50, device=self.device)).item() / self.gpu_num
        map = reduce_sum(torch.tensor(
            map, device=self.device)).item() / self.gpu_num
        print(
            "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}"
            .format(epoch + 1, self.local_rank, mp * 100, mr * 100,
                    map50 * 100, map * 100))
        last_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:d}_{:s}_last.pth".format(self.local_rank,
                                        self.cfg['model_name']))
        best_map_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:d}_{:s}_best_map.pth".format(self.local_rank,
                                            self.cfg['model_name']))
        best_map50_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:d}_{:s}_best_map50.pth".format(self.local_rank,
                                              self.cfg['model_name']))

        ema_static = self.ema.ema.state_dict()
        cpkt = {
            "ema": ema_static,
            "map": map * 100,
            "epoch": epoch,
            "map50": map50 * 100
        }
        if self.local_rank != 0:
            return
        torch.save(cpkt, last_weight_path)
        if map > self.best_map:
            torch.save(cpkt, best_map_weight_path)
            self.best_map = map
        if map50 > self.best_map50:
            torch.save(cpkt, best_map50_weight_path)
            self.best_map50 = map50

    def run(self):
        for epoch in range(self.optim_cfg['epochs']):
            self.train(epoch)
            if (epoch + 1) % self.val_cfg['interval'] == 0:
                self.val(epoch)
Exemple #10
0
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.hyper_params = self.cfg['hyper_params']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        self.gpu_num = len(str(self.cfg['gpus']).split(","))
        dist.init_process_group(backend='nccl')
        ###########################################################################################
        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=1,
                                  num_workers=1,
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))
        ############################################################################################
        model = CenterNet(num_cls=self.model_cfg['num_cls'],
                          PIXEL_MEAN=self.model_cfg['PIXEL_MEAN'],
                          PIXEL_STD=self.model_cfg['PIXEL_STD'],
                          backbone=self.model_cfg['backbone'],
                          cfg=self.model_cfg)

        self.best_map = 0.
        self.best_map50 = 0.
        optimizer = split_optimizer(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        self.gt_generator = CenterNetGT(
            alpha=self.model_cfg['alpha'],
            beta=self.model_cfg['beta'],
            num_cls=self.model_cfg['num_cls'],
            wh_planes=self.model_cfg['wh_planes'],
            down_ratio=self.model_cfg['down_ratio'],
            wh_area_process=self.model_cfg['wh_area_process'])

        self.creterion = CenterNetLoss(
            hm_weight=self.hyper_params['hm_weight'],
            wh_weight=self.hyper_params['wh_weight'],
            down_ratio=self.model_cfg['down_ratio'])

        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            cosine_weights=self.optim_cfg['cosine_weights'])
Exemple #11
0
class DDPMixProcessor(object):
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        self.gpu_num = len(str(self.cfg['gpus']).split(","))
        dist.init_process_group(backend='nccl')
        ### dataloader -------------------------------------------------------------------------------------------------
        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            min_thresh=self.data_cfg['min_thresh'],
            max_thresh=self.data_cfg['max_thresh'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            min_thresh=self.data_cfg['min_thresh'],
            max_thresh=self.data_cfg['max_thresh'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))

        ##### model set ------------------------------------------------------------------------------------------------
        model = FasterRCNN(cfg=self.model_cfg)
        self.scaler = amp.GradScaler(enabled=True)
        self.best_map = 0.
        self.best_map50 = 0.
        optimizer = split_optimizer(model, self.optim_cfg)
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        if self.optim_cfg['sync_bn']:
            model = nn.SyncBatchNorm.convert_sync_batchnorm(model)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)
        self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            alpha=self.optim_cfg['alpha'],
            gamma=self.optim_cfg['gamma'],
            bias_idx=2,
            milestones=self.optim_cfg['milestones'])

    def train(self, epoch):
        self.model.train()
        if self.model_cfg['freeze_bn']:
            self.model.apply(freeze_bn)
        if self.local_rank == 0:
            pbar = tqdm(self.tloader)
        else:
            pbar = self.tloader
        loss_list = [list(), list(), list(), list(), list()]
        ulr = 0
        dlr = 0
        for i, (img_tensor, targets_tensor, _) in enumerate(pbar):
            _, _, h, w = img_tensor.shape
            with torch.no_grad():
                targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                    data=[w, h, w, h])
                img_tensor = img_tensor.to(self.device)
                targets_tensor = targets_tensor.to(self.device)
            self.optimizer.zero_grad()
            with amp.autocast(enabled=True):
                _, total_loss = self.model(img_tensor, None, targets_tensor)
                rpn_cls_loss = total_loss['rpn_cls_loss']
                rpn_box_loss = total_loss['rpn_box_loss']
                roi_cls_loss = total_loss['roi_cls_loss']
                roi_box_loss = total_loss['roi_box_loss']
                loss = rpn_cls_loss + rpn_box_loss + roi_cls_loss + roi_box_loss
            self.scaler.scale(loss).backward()
            self.lr_adjuster(self.optimizer, i, epoch)
            ulr = self.optimizer.param_groups[0]['lr']
            dlr = self.optimizer.param_groups[2]['lr']
            self.scaler.step(self.optimizer)
            self.scaler.update()
            self.ema.update(self.model)
            loss_list[0].append(loss.item())
            loss_list[1].append(rpn_cls_loss.item())
            loss_list[2].append(rpn_box_loss.item())
            loss_list[3].append(roi_cls_loss.item())
            loss_list[4].append(roi_box_loss.item())
            if self.local_rank == 0:
                pbar.set_description(
                    "epoch:{:2d}|loss:{:6.4f}|{:6.4f}|{:6.4f}|{:6.4f}|{:6.4f}|ulr:{:8.6f},dlr:{:8.6f}"
                    .format(epoch + 1, loss.item(), rpn_cls_loss.item(),
                            rpn_box_loss.item(), roi_cls_loss.item(),
                            roi_box_loss.item(), ulr, dlr))
        self.ema.update_attr(self.model)
        mean_loss_list = [np.array(item).mean() for item in loss_list]
        print(
            "epoch:{:3d}|local:{:3d}|loss:{:6.4f}|{:6.4f}|:{:6.4f}|{:6.4f}|{:6.4f}|ulr:{:8.6f}|dlr:{:8.6f}"
            .format(epoch + 1, self.local_rank, mean_loss_list[0],
                    mean_loss_list[1], mean_loss_list[2], mean_loss_list[3],
                    mean_loss_list[4], ulr, dlr))

    @torch.no_grad()
    def val(self, epoch):
        self.ema.ema.eval()
        predict_list = list()
        target_list = list()
        # self.model.eval()
        if self.local_rank == 0:
            pbar = tqdm(self.vloader)
        else:
            pbar = self.vloader
        for i, (img_tensor, targets_tensor, _) in enumerate(pbar):
            _, _, h, w = img_tensor.shape
            targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                data=[w, h, w, h])
            img_tensor = img_tensor.to(self.device)
            targets_tensor = targets_tensor.to(self.device)
            predicts, _ = self.ema.ema(img_tensor, None)
            for i, predict in enumerate(predicts):
                predict_list.append(predict)
                targets_sample = targets_tensor[targets_tensor[:, 0] == i][:,
                                                                           2:]
                target_list.append(targets_sample)
        mp, mr, map50, map = coco_map(predict_list, target_list)
        mp = reduce_sum(torch.tensor(mp,
                                     device=self.device)).item() / self.gpu_num
        mr = reduce_sum(torch.tensor(mr,
                                     device=self.device)).item() / self.gpu_num
        map50 = reduce_sum(torch.tensor(
            map50, device=self.device)).item() / self.gpu_num
        map = reduce_sum(torch.tensor(
            map, device=self.device)).item() / self.gpu_num
        if self.local_rank == 0:
            print(
                "epoch: {:2d}|gpu_num:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}"
                .format(epoch + 1, self.gpu_num, mp * 100, mr * 100,
                        map50 * 100, map * 100))
        last_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_last.pth".format(self.cfg['model_name']))
        best_map_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map.pth".format(self.cfg['model_name']))
        best_map50_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map50.pth".format(self.cfg['model_name']))
        ema_static = self.ema.ema.state_dict()
        cpkt = {
            "ema": ema_static,
            "map": map * 100,
            "epoch": epoch,
            "map50": map50 * 100
        }
        if self.local_rank != 0:
            return
        torch.save(cpkt, last_weight_path)
        if map > self.best_map:
            torch.save(cpkt, best_map_weight_path)
            self.best_map = map
        if map50 > self.best_map50:
            torch.save(cpkt, best_map50_weight_path)
            self.best_map50 = map50

    def run(self):
        for epoch in range(self.optim_cfg['epochs']):
            self.train(epoch)
            if (epoch + 1) % self.val_cfg['interval'] == 0:
                self.val(epoch)
        dist.destroy_process_group()
        torch.cuda.empty_cache()
Exemple #12
0
 def __init__(self, cfg_path):
     with open(cfg_path, 'r') as rf:
         self.cfg = yaml.safe_load(rf)
     self.data_cfg = self.cfg['data']
     self.model_cfg = self.cfg['model']
     self.optim_cfg = self.cfg['optim']
     self.val_cfg = self.cfg['val']
     print(self.data_cfg)
     print(self.model_cfg)
     print(self.optim_cfg)
     print(self.val_cfg)
     os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
     self.gpu_num = len(str(self.cfg['gpus']).split(","))
     dist.init_process_group(backend='nccl')
     self.tdata = COCODataSets(
         img_root=self.data_cfg['train_img_root'],
         annotation_path=self.data_cfg['train_annotation_path'],
         min_thresh=self.data_cfg['min_thresh'],
         max_thresh=self.data_cfg['max_thresh'],
         debug=self.data_cfg['debug'],
         use_crowd=self.data_cfg['use_crowd'],
         augments=True,
         remove_blank=self.data_cfg['remove_blank'])
     self.tloader = DataLoader(dataset=self.tdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.tdata.collate_fn,
                               sampler=DistributedSampler(
                                   dataset=self.tdata, shuffle=True))
     self.vdata = COCODataSets(
         img_root=self.data_cfg['val_img_root'],
         annotation_path=self.data_cfg['val_annotation_path'],
         min_thresh=self.data_cfg['min_thresh'],
         max_thresh=self.data_cfg['max_thresh'],
         debug=self.data_cfg['debug'],
         use_crowd=self.data_cfg['use_crowd'],
         augments=False,
         remove_blank=False)
     self.vloader = DataLoader(dataset=self.vdata,
                               batch_size=self.data_cfg['batch_size'],
                               num_workers=self.data_cfg['num_workers'],
                               collate_fn=self.vdata.collate_fn,
                               sampler=DistributedSampler(
                                   dataset=self.vdata, shuffle=False))
     print("train_data: ", len(self.tdata), " | ", "val_data: ",
           len(self.vdata), " | ", "empty_data: ",
           self.tdata.empty_images_len)
     print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
           len(self.vloader))
     model = DETR(self.model_cfg)
     self.scaler = amp.GradScaler(enabled=True)
     self.best_map = 0.
     self.best_map50 = 0.
     param_dicts = [
         {
             "params": [
                 p for n, p in model.named_parameters()
                 if "backbone" not in n and p.requires_grad
             ]
         },
         {
             "params": [
                 p for n, p in model.named_parameters()
                 if "backbone" in n and p.requires_grad
             ],
             "lr":
             self.optim_cfg['backbone_lr'],
         },
     ]
     optimizer = torch.optim.AdamW(
         param_dicts,
         lr=self.optim_cfg['lr'],
         weight_decay=self.optim_cfg['weight_decay'])
     local_rank = dist.get_rank()
     self.local_rank = local_rank
     self.device = torch.device("cuda", local_rank)
     model.to(self.device)
     self.model = nn.parallel.distributed.DistributedDataParallel(
         model, device_ids=[local_rank], output_device=local_rank)
     self.optimizer = optimizer
     self.ema = ModelEMA(self.model)
     self.lr_adjuster = torch.optim.lr_scheduler.StepLR(
         optimizer, self.optim_cfg['decay_steps'])
Exemple #13
0
class DDPMixProcessor(object):
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']
        self.model_cfg = self.cfg['model']
        self.optim_cfg = self.cfg['optim']
        self.val_cfg = self.cfg['val']
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.val_cfg)
        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus']
        self.gpu_num = len(str(self.cfg['gpus']).split(","))
        dist.init_process_group(backend='nccl')
        self.tdata = COCODataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            min_thresh=self.data_cfg['min_thresh'],
            max_thresh=self.data_cfg['max_thresh'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.tdata, shuffle=True))
        self.vdata = COCODataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            min_thresh=self.data_cfg['min_thresh'],
            max_thresh=self.data_cfg['max_thresh'],
            debug=self.data_cfg['debug'],
            use_crowd=self.data_cfg['use_crowd'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn,
                                  sampler=DistributedSampler(
                                      dataset=self.vdata, shuffle=False))
        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))
        model = DETR(self.model_cfg)
        self.scaler = amp.GradScaler(enabled=True)
        self.best_map = 0.
        self.best_map50 = 0.
        param_dicts = [
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if "backbone" not in n and p.requires_grad
                ]
            },
            {
                "params": [
                    p for n, p in model.named_parameters()
                    if "backbone" in n and p.requires_grad
                ],
                "lr":
                self.optim_cfg['backbone_lr'],
            },
        ]
        optimizer = torch.optim.AdamW(
            param_dicts,
            lr=self.optim_cfg['lr'],
            weight_decay=self.optim_cfg['weight_decay'])
        local_rank = dist.get_rank()
        self.local_rank = local_rank
        self.device = torch.device("cuda", local_rank)
        model.to(self.device)
        self.model = nn.parallel.distributed.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank)
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)
        self.lr_adjuster = torch.optim.lr_scheduler.StepLR(
            optimizer, self.optim_cfg['decay_steps'])

    def train(self, epoch):
        self.model.train()
        if self.local_rank == 0:
            pbar = tqdm(self.tloader)
        else:
            pbar = self.tloader
        loss_list = [list(), list(), list(), list()]
        ulr = 0
        dlr = 0
        for i, (input_tensor, _) in enumerate(pbar):
            input_tensor.to(self.device)
            self.optimizer.zero_grad()
            with amp.autocast(enabled=True):
                _, total_loss = self.model(input_tensor)
                cls_loss = total_loss['cls_loss']
                dis_loss = total_loss['dis_loss']
                iou_loss = total_loss['iou_loss']
                loss = cls_loss + dis_loss + iou_loss
            self.scaler.scale(loss).backward()
            ulr = self.optimizer.param_groups[0]['lr']
            dlr = self.optimizer.param_groups[1]['lr']
            nn.utils.clip_grad_norm_(self.model.parameters(), 0.1)
            self.scaler.step(self.optimizer)
            self.scaler.update()
            self.ema.update(self.model)
            loss_list[0].append(loss.item())
            loss_list[1].append(cls_loss.item() / 6)
            loss_list[2].append(dis_loss.item() / 6)
            loss_list[3].append(iou_loss.item() / 6)
            if self.local_rank == 0:
                pbar.set_description(
                    "epoch:{:2d}|loss:{:6.4f}|{:6.4f}|{:6.4f}|{:6.4f}|ulr:{:8.6f},dlr:{:8.6f}"
                    .format(epoch + 1, loss.item(),
                            cls_loss.item() / 6,
                            dis_loss.item() / 6,
                            iou_loss.item() / 6, ulr, dlr))
        self.lr_adjuster.step()
        self.ema.update_attr(self.model)
        mean_loss_list = [np.array(item).mean() for item in loss_list]
        print(
            "epoch:{:3d}|local:{:3d}|loss:{:6.4f}|{:6.4f}|:{:6.4f}|{:6.4f}|ulr:{:8.6f}|dlr:{:8.6f}"
            .format(epoch + 1, self.local_rank, mean_loss_list[0],
                    mean_loss_list[1], mean_loss_list[2], mean_loss_list[3],
                    ulr, dlr))

    @torch.no_grad()
    def val(self, epoch):
        self.model.eval()
        self.ema.ema.eval()
        predict_list = list()
        target_list = list()
        # self.model.eval()
        if self.local_rank == 0:
            pbar = tqdm(self.vloader)
        else:
            pbar = self.vloader
        for i, (input_tensor, _) in enumerate(pbar):
            input_tensor.un_normalize_box()
            input_tensor.to(self.device)
            predicts, _ = self.model(input_tensor)
            for i, predict in enumerate(predicts):
                predict_list.append(predict)
                boxes = input_tensor.boxes[i]
                labels = input_tensor.labels[i]
                target_list.append(
                    torch.cat([labels.float().unsqueeze(-1), boxes], dim=-1))
        mp, mr, map50, map = coco_map(predict_list, target_list)
        mp = reduce_sum(torch.tensor(mp,
                                     device=self.device)).item() / self.gpu_num
        mr = reduce_sum(torch.tensor(mr,
                                     device=self.device)).item() / self.gpu_num
        map50 = reduce_sum(torch.tensor(
            map50, device=self.device)).item() / self.gpu_num
        map = reduce_sum(torch.tensor(
            map, device=self.device)).item() / self.gpu_num
        if self.local_rank == 0:
            print(
                "epoch: {:2d}|gpu_num:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}"
                .format(epoch + 1, self.gpu_num, mp * 100, mr * 100,
                        map50 * 100, map * 100))
        last_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_last.pth".format(self.cfg['model_name']))
        best_map_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map.pth".format(self.cfg['model_name']))
        best_map50_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map50.pth".format(self.cfg['model_name']))
        ema_static = self.ema.ema.state_dict()
        cpkt = {
            "ema": ema_static,
            "map": map * 100,
            "epoch": epoch,
            "map50": map50 * 100,
            "model": self.model.module.state_dict()
        }
        if self.local_rank != 0:
            return
        torch.save(cpkt, last_weight_path)
        if map > self.best_map:
            torch.save(cpkt, best_map_weight_path)
            self.best_map = map
        if map50 > self.best_map50:
            torch.save(cpkt, best_map50_weight_path)
            self.best_map50 = map50

    def run(self):
        for epoch in range(self.optim_cfg['epochs']):
            self.train(epoch)
            if (epoch + 1) % self.val_cfg['interval'] == 0:
                self.val(epoch)
        dist.destroy_process_group()
        torch.cuda.empty_cache()
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']  # dataset params
        self.model_cfg = self.cfg['model']  # model params
        self.optim_cfg = self.cfg['optim']  # optim params
        self.hyper_params = self.cfg['hyper_params']  # other hyper params
        self.val_cfg = self.cfg['val']  # validation hyper params
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)

        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg[
            'gpus']  # set avaliable gpu

        ## load dataset ---------------------------------------------------------------------------------------
        # self.tdata = COCODataSets(img_root=self.data_cfg['train_img_root'],
        #                           annotation_path=self.data_cfg['train_annotation_path'],
        #                           img_size=self.data_cfg['img_size'],
        #                           debug=self.data_cfg['debug'],
        #                           augments=True,
        #                           remove_blank=self.data_cfg['remove_blank'],
        #                             image_weight = self.hyper_params['use_weight_sample']
        #                           )
        self.tdata = BDD100DataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'],
            image_weight=self.hyper_params['use_weight_sample'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn)

        # self.vdata = COCODataSets(img_root=self.data_cfg['val_img_root'],
        #                           annotation_path=self.data_cfg['val_annotation_path'],
        #                           img_size=self.data_cfg['img_size'],
        #                           debug=self.data_cfg['debug'],
        #                           augments=False,
        #                           remove_blank=False
        #                           )
        self.vdata = BDD100DataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn)

        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))

        ### define model -------------------------------------------------------------------------------------
        model = YOLOv5(in_channels=3,
                       num_cls=self.model_cfg['num_cls'],
                       anchors=self.model_cfg['anchors'],
                       strides=self.model_cfg['strides'],
                       scale_name=self.model_cfg['scale_name'])

        ### check anchor -------------------------------------------------------------------------------------
        # check_anchors(self.tdata,model,self.hyper_params['anchor_t'],self.data_cfg['img_size'])

        ############------------------------------------------------------------------------------------------
        self.best_map = 0.
        self.best_map50 = 0.

        optimizer = split_optimizer(model, self.optim_cfg)

        self.device = torch.device('cuda:0')
        model.to(self.device)
        pretrain = self.model_cfg.get('pretrain', None)
        if pretrain:
            pretrained_weights = torch.load(pretrain, map_location=self.device)
            load_info = model.load_state_dict(pretrained_weights['ema'],
                                              strict=False)
            print('load info ', load_info)

        # 通过torch1.6自带的api设置混合精度训练
        self.scaler = amp.GradScaler(enabled=True)

        self.model = model
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        self.creterion = YOLOv5LossOriginal(
            iou_type=self.hyper_params['iou_type'],
            fl_gamma=self.hyper_params['fl_gamma'],
            class_smoothing_eps=self.hyper_params['class_smoothing_eps'])

        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            cosine_weights=self.optim_cfg['cosine_weights'])

        ## for class-aware weighted sampling ---------------------------------------------------------------------
        self.class_weights = labels_to_class_weights(self.tdata.labels, nc=self.model_cfg['num_cls']).to(self.device) if \
        self.hyper_params['use_weight_sample'] else None
        self.maps = np.zeros(self.model_cfg['num_cls'])  # mAP per class
class COCODDPApexProcessor(object):
    def __init__(self, cfg_path):
        with open(cfg_path, 'r') as rf:
            self.cfg = yaml.safe_load(rf)
        self.data_cfg = self.cfg['data']  # dataset params
        self.model_cfg = self.cfg['model']  # model params
        self.optim_cfg = self.cfg['optim']  # optim params
        self.hyper_params = self.cfg['hyper_params']  # other hyper params
        self.val_cfg = self.cfg['val']  # validation hyper params
        print(self.data_cfg)
        print(self.model_cfg)
        print(self.optim_cfg)
        print(self.hyper_params)
        print(self.val_cfg)

        os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg[
            'gpus']  # set avaliable gpu

        ## load dataset ---------------------------------------------------------------------------------------
        # self.tdata = COCODataSets(img_root=self.data_cfg['train_img_root'],
        #                           annotation_path=self.data_cfg['train_annotation_path'],
        #                           img_size=self.data_cfg['img_size'],
        #                           debug=self.data_cfg['debug'],
        #                           augments=True,
        #                           remove_blank=self.data_cfg['remove_blank'],
        #                             image_weight = self.hyper_params['use_weight_sample']
        #                           )
        self.tdata = BDD100DataSets(
            img_root=self.data_cfg['train_img_root'],
            annotation_path=self.data_cfg['train_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            augments=True,
            remove_blank=self.data_cfg['remove_blank'],
            image_weight=self.hyper_params['use_weight_sample'])
        self.tloader = DataLoader(dataset=self.tdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.tdata.collate_fn)

        # self.vdata = COCODataSets(img_root=self.data_cfg['val_img_root'],
        #                           annotation_path=self.data_cfg['val_annotation_path'],
        #                           img_size=self.data_cfg['img_size'],
        #                           debug=self.data_cfg['debug'],
        #                           augments=False,
        #                           remove_blank=False
        #                           )
        self.vdata = BDD100DataSets(
            img_root=self.data_cfg['val_img_root'],
            annotation_path=self.data_cfg['val_annotation_path'],
            img_size=self.data_cfg['img_size'],
            debug=self.data_cfg['debug'],
            augments=False,
            remove_blank=False)
        self.vloader = DataLoader(dataset=self.vdata,
                                  batch_size=self.data_cfg['batch_size'],
                                  num_workers=self.data_cfg['num_workers'],
                                  collate_fn=self.vdata.collate_fn)

        print("train_data: ", len(self.tdata), " | ", "val_data: ",
              len(self.vdata), " | ", "empty_data: ",
              self.tdata.empty_images_len)
        print("train_iter: ", len(self.tloader), " | ", "val_iter: ",
              len(self.vloader))

        ### define model -------------------------------------------------------------------------------------
        model = YOLOv5(in_channels=3,
                       num_cls=self.model_cfg['num_cls'],
                       anchors=self.model_cfg['anchors'],
                       strides=self.model_cfg['strides'],
                       scale_name=self.model_cfg['scale_name'])

        ### check anchor -------------------------------------------------------------------------------------
        # check_anchors(self.tdata,model,self.hyper_params['anchor_t'],self.data_cfg['img_size'])

        ############------------------------------------------------------------------------------------------
        self.best_map = 0.
        self.best_map50 = 0.

        optimizer = split_optimizer(model, self.optim_cfg)

        self.device = torch.device('cuda:0')
        model.to(self.device)
        pretrain = self.model_cfg.get('pretrain', None)
        if pretrain:
            pretrained_weights = torch.load(pretrain, map_location=self.device)
            load_info = model.load_state_dict(pretrained_weights['ema'],
                                              strict=False)
            print('load info ', load_info)

        # 通过torch1.6自带的api设置混合精度训练
        self.scaler = amp.GradScaler(enabled=True)

        self.model = model
        self.optimizer = optimizer
        self.ema = ModelEMA(self.model)

        self.creterion = YOLOv5LossOriginal(
            iou_type=self.hyper_params['iou_type'],
            fl_gamma=self.hyper_params['fl_gamma'],
            class_smoothing_eps=self.hyper_params['class_smoothing_eps'])

        self.lr_adjuster = WarmUpCosineDecayMultiStepLRAdjust(
            init_lr=self.optim_cfg['lr'],
            milestones=self.optim_cfg['milestones'],
            warm_up_epoch=self.optim_cfg['warm_up_epoch'],
            iter_per_epoch=len(self.tloader),
            epochs=self.optim_cfg['epochs'],
            cosine_weights=self.optim_cfg['cosine_weights'])

        ## for class-aware weighted sampling ---------------------------------------------------------------------
        self.class_weights = labels_to_class_weights(self.tdata.labels, nc=self.model_cfg['num_cls']).to(self.device) if \
        self.hyper_params['use_weight_sample'] else None
        self.maps = np.zeros(self.model_cfg['num_cls'])  # mAP per class

    def train(self, epoch):
        self.model.train()
        if self.model_cfg['freeze_bn']:
            self.model.apply(freeze_bn)

        if self.hyper_params['use_weight_sample']:
            cw = self.class_weights.cpu().numpy() * (
                1 - self.maps)**2  # class weight
            iw = labels_to_image_weights(self.tdata.labels,
                                         nc=self.model_cfg['num_cls'],
                                         class_weights=cw)  # image weight
            self.tdata.indices = random.choices(
                range(len(self.tdata)), weights=iw,
                k=len(self.tdata))  # rand weighted idx

        pbar = tqdm(self.tloader)

        loss_list = [list(), list(), list(),
                     list()]  # loss_box, loss_obj, loss_cls, loss
        lr = 0
        match_num = 0

        for i, (img_tensor, targets_tensor, _, _) in enumerate(pbar):
            '''
            img_tensor: [bs,3,h,w]
            targets_tensor:  [bs,7] (bs_idx,weights,label_idx,x1,y1,x2,y2), box annotations have been normalized
            '''
            if len(self.hyper_params['multi_scale']) >= 2:
                target_size = np.random.choice(
                    self.hyper_params['multi_scale'])
                img_tensor = interpolate(img_tensor,
                                         mode='bilinear',
                                         size=target_size,
                                         align_corners=False)
            _, _, h, w = img_tensor.shape
            with torch.no_grad():
                img_tensor = img_tensor.to(self.device)
                # bs_idx,weights,label_idx,x1,y1,x2,y2
                targets_tensor[:, [
                    5, 6
                ]] = targets_tensor[:, [5, 6]] - targets_tensor[:, [3, 4]]
                targets_tensor[:, [
                    3, 4
                ]] = targets_tensor[:, [3, 4]] + targets_tensor[:, [5, 6]] / 2.
                targets_tensor = targets_tensor.to(self.device)

            self.optimizer.zero_grad()
            # Forward
            # 混合精度
            with amp.autocast(True):
                '''
                predicts(list): len=nl, predicts[i].shape=(bs,3,ny,nx,85)
                normalized_anchor(torch.Tensor): shape=[3,3,2]
                '''
                predicts, anchors = self.model(img_tensor)
                # 计算损失,包括分类损失,objectness损失,框的回归损失
                # loss为总损失值,loss_items为一个元组,包含分类损失,objectness损失,框的回归损失和总损失
                total_loss, detail_loss, total_num = self.creterion(
                    predicts, targets_tensor,
                    anchors)  # loss scaled by batch_size

            self.scaler.scale(total_loss).backward()
            match_num += total_num
            # nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.optim_cfg['max_norm'],norm_type=2)
            self.lr_adjuster(self.optimizer, i, epoch)
            lr = self.optimizer.param_groups[0]['lr']
            self.scaler.step(self.optimizer)
            self.scaler.update()
            self.ema.update(self.model)

            loss_box, loss_obj, loss_cls, loss = detail_loss
            loss_list[0].append(loss_box.item())
            loss_list[1].append(loss_obj.item())
            loss_list[2].append(loss_cls.item())
            loss_list[3].append(loss.item())
            pbar.set_description(
                "epoch:{:2d}|match_num:{:4d}|size:{:3d}|loss:{:6.4f}|loss_box:{:6.4f}|loss_obj:{:6.4f}|loss_cls:{:6.4f}|lr:{:8.6f}"
                .format(epoch + 1, int(total_num), h, loss.item(),
                        loss_box.item(), loss_obj.item(), loss_cls.item(), lr))

        self.ema.update_attr(self.model)
        mean_loss_list = [np.array(item).mean() for item in loss_list]
        print(
            "epoch:{:3d}|match_num:{:4d}|local:{:3d}|loss:{:6.4f}||loss_box:{:6.4f}|loss_obj:{:6.4f}|loss_cls:{:6.4f}|lr:{:8.6f}"
            .format(epoch + 1, match_num, 0, mean_loss_list[3],
                    mean_loss_list[0], mean_loss_list[1], mean_loss_list[2],
                    lr))

    @torch.no_grad()
    def val(self, epoch):
        predict_list = list()
        target_list = list()
        # self.model.eval()

        pbar = tqdm(self.vloader)

        path_list, shape_list = [], []
        for img_tensor, targets_tensor, imgIDs, shapes in pbar:
            _, _, h, w = img_tensor.shape
            targets_tensor[:, 3:] = targets_tensor[:, 3:] * torch.tensor(
                data=[w, h, w, h])
            img_tensor = img_tensor.to(self.device)
            targets_tensor = targets_tensor.to(self.device)
            predicts = self.ema.ema(img_tensor)
            #  predicts after nms is a list(len=bs), its element has shape=[N,6]  6==>(x1, y1, x2, y2, conf, cls)
            predicts = non_max_suppression(
                predicts,
                conf_thresh=self.val_cfg['conf_thresh'],
                iou_thresh=self.val_cfg['iou_thresh'],
                max_det=self.val_cfg['max_det'])

            for i, predict in enumerate(predicts):
                if predict is not None:
                    clip_coords(predict, (h, w))
                predict_list.append(predict)
                targets_sample = targets_tensor[targets_tensor[:, 0] == i][:,
                                                                           2:]
                target_list.append(targets_sample)
                path_list.append(imgIDs[i])
                shape_list.append(shapes[i])
            # after above code block, predict_list(len=len(dataset)), its element shape = [n,6]  6==>(x1,y1,x2,y2,score,cls_id)
            #                         target_list(len=len(dataset)), its element shape = [m, 5] 5==>(cls_id,x1,y1,x2,y2)
        mp, mr, map50, map, self.maps = coco_map(predict_list, target_list,
                                                 path_list, shape_list,
                                                 self.data_cfg['img_size'])
        print(
            "epoch: {:2d}|local:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}"
            .format(epoch + 1, 0, mp * 100, mr * 100, map50 * 100, map * 100))
        last_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_last.pth".format(self.cfg['model_name']))
        best_map_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map.pth".format(self.cfg['model_name']))
        best_map50_weight_path = os.path.join(
            self.val_cfg['weight_path'],
            "{:s}_best_map50.pth".format(self.cfg['model_name']))
        # model_static = self.model.module.state_dict() if is_parallel(self.model) else self.model.state_dict()

        ema_static = self.ema.ema.state_dict()
        cpkt = {
            "ema": ema_static,
            "map": map * 100,
            "epoch": epoch,
            "map50": map50 * 100
        }

        torch.save(cpkt, last_weight_path)
        if map > self.best_map:
            torch.save(cpkt, best_map_weight_path)
            self.best_map = map
        if map50 > self.best_map50:
            torch.save(cpkt, best_map50_weight_path)
            self.best_map50 = map50

    def run(self):
        for epoch in range(self.optim_cfg['epochs']):
            self.train(epoch)
            if (epoch + 1) % self.val_cfg['interval'] == 0:
                self.val(epoch)