def eval_model(weight_path="weights/gfocalV2_resnet50_best_map.pth", device="cuda:0"): from pycocotools.coco import COCO device = torch.device(device) with open("config/gfocal.yaml", 'r') as rf: cfg = yaml.safe_load(rf) net = GFocal(**{ **cfg['model'], 'pretrained': False, "nms_iou_thresh": 0.6 }) net.load_state_dict(torch.load(weight_path, map_location="cpu")['ema']) net.to(device) net.eval().half() data_cfg = cfg['data'] basic_transform = RandScaleToMax(max_threshes=[data_cfg['max_thresh']]) coco = COCO(data_cfg['val_annotation_path']) coco_predict_list = list() time_logger = AverageLogger() pbar = tqdm(coco.imgs.keys()) for img_id in pbar: file_name = coco.imgs[img_id]['file_name'] img_path = os.path.join(data_cfg['val_img_root'], file_name) img = cv.imread(img_path) h, w, _ = img.shape img, ratio, (left, top) = basic_transform.make_border( img, max_thresh=data_cfg['max_thresh'], border_val=(103, 116, 123)) img_inp = (img[:, :, ::-1] / 255.0 - np.array(rgb_mean)) / np.array(rgb_std) img_inp = torch.from_numpy(img_inp).unsqueeze(0).permute( 0, 3, 1, 2).contiguous().float().to(device).half() tic = time.time() predict = net(img_inp)["predicts"][0] duration = time.time() - tic time_logger.update(duration) pbar.set_description("fps:{:4.2f}".format(1 / time_logger.avg())) if predict is None: continue predict[:, [0, 2]] = ((predict[:, [0, 2]] - left) / ratio).clamp(min=0, max=w) predict[:, [1, 3]] = ((predict[:, [1, 3]] - top) / ratio).clamp(min=0, max=h) box = predict.cpu().numpy() coco_box = box[:, :4] coco_box[:, 2:] = coco_box[:, 2:] - coco_box[:, :2] for p, b in zip(box.tolist(), coco_box.tolist()): coco_predict_list.append({ 'image_id': img_id, 'category_id': coco_ids[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) with open("predicts.json", 'w') as file: json.dump(coco_predict_list, file) coco_eavl(anno_path=data_cfg['val_annotation_path'], pred_path="predicts.json")
def eval_model(weight_path="weights/faster_rcnn_resnet50_last.pth", device="cuda:4"): from pycocotools.coco import COCO device = torch.device(device) with open("config/faster.yaml", 'r') as rf: cfg = yaml.safe_load(rf) net = FasterRCNN(**{**cfg['model'], 'pretrained': False}) net.load_state_dict(torch.load(weight_path, map_location="cpu")['ema']) net.to(device) net.eval() data_cfg = cfg['data'] basic_transform = RandScaleMinMax(min_threshes=[640], max_thresh=data_cfg['max_thresh']) coco = COCO(data_cfg['val_annotation_path']) coco_predict_list = list() time_logger = AverageLogger() pbar = tqdm(coco.imgs.keys()) for img_id in pbar: file_name = coco.imgs[img_id]['file_name'] img_path = os.path.join(data_cfg['val_img_root'], file_name) img = cv.imread(img_path) h, w, _ = img.shape img, ratio = basic_transform.scale_img(img, min_thresh=640) h_, w_ = img.shape[:2] padding_size = max(h_, w_) img_inp = np.ones((padding_size, padding_size, 3)) * np.array( (103, 116, 123)) img_inp[:h_, :w_, :] = img img_inp = (img_inp[:, :, ::-1] / 255.0 - np.array(rgb_mean)) / np.array(rgb_std) img_inp = torch.from_numpy(img_inp).unsqueeze(0).permute( 0, 3, 1, 2).contiguous().float().to(device) tic = time.time() predict = net(img_inp, valid_size=[(padding_size, padding_size)])[0] duration = time.time() - tic time_logger.update(duration) pbar.set_description("fps:{:4.2f}".format(1 / time_logger.avg())) if predict is None: continue predict[:, [0, 2]] = (predict[:, [0, 2]] / ratio).clamp(min=0, max=w) predict[:, [1, 3]] = (predict[:, [1, 3]] / ratio).clamp(min=0, max=h) box = predict.cpu().numpy() coco_box = box[:, :4] coco_box[:, 2:] = coco_box[:, 2:] - coco_box[:, :2] for p, b in zip(box.tolist(), coco_box.tolist()): coco_predict_list.append({ 'image_id': img_id, 'category_id': coco_ids[int(p[5])], 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5) }) with open("predicts.json", 'w') as file: json.dump(coco_predict_list, file) coco_eavl(anno_path=data_cfg['val_annotation_path'], pred_path="predicts.json")
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(str(self.cfg['gpus']).split(",")) dist.init_process_group(backend='nccl') self.tdata = CustomerDataSets(json_path=self.data_cfg['train_json_path'], debug=self.data_cfg['debug'], augment=True, ) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler(dataset=self.tdata, shuffle=True)) self.vdata = CustomerDataSets(json_path=self.data_cfg['val_json_path'], debug=self.data_cfg['debug'], augment=False, ) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler(dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata)) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) if self.cfg['model_name'] == "v4": net = YOLOv4 elif self.cfg['model_name'] == "v5": net = YOLOv5 else: raise NotImplementedError("{:s} not supported yet".format(self.cfg['model_name'])) model = net(num_cls=self.model_cfg['num_cls'], anchors=self.model_cfg['anchors'], strides=self.model_cfg['strides'], scale_name=self.model_cfg['scale_name'], ) self.best_map = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], gamma=self.optim_cfg['gamma'], bias_idx=2, milestones=self.optim_cfg['milestones'] ) self.obj_logger = AverageLogger() self.iou_logger = AverageLogger() self.loss_logger = AverageLogger() self.map_logger = AverageLogger()
class DDPMixSolver(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(str(self.cfg['gpus']).split(",")) dist.init_process_group(backend='nccl') self.tdata = CustomerDataSets(json_path=self.data_cfg['train_json_path'], debug=self.data_cfg['debug'], augment=True, ) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collate_fn, sampler=DistributedSampler(dataset=self.tdata, shuffle=True)) self.vdata = CustomerDataSets(json_path=self.data_cfg['val_json_path'], debug=self.data_cfg['debug'], augment=False, ) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collate_fn, sampler=DistributedSampler(dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata)) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) if self.cfg['model_name'] == "v4": net = YOLOv4 elif self.cfg['model_name'] == "v5": net = YOLOv5 else: raise NotImplementedError("{:s} not supported yet".format(self.cfg['model_name'])) model = net(num_cls=self.model_cfg['num_cls'], anchors=self.model_cfg['anchors'], strides=self.model_cfg['strides'], scale_name=self.model_cfg['scale_name'], ) self.best_map = 0. optimizer = split_optimizer(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) self.scaler = amp.GradScaler(enabled=True) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel(model, device_ids=[local_rank], output_device=local_rank) self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = IterWarmUpCosineDecayMultiStepLRAdjust(init_lr=self.optim_cfg['lr'], warm_up_epoch=self.optim_cfg['warm_up_epoch'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], gamma=self.optim_cfg['gamma'], bias_idx=2, milestones=self.optim_cfg['milestones'] ) self.obj_logger = AverageLogger() self.iou_logger = AverageLogger() self.loss_logger = AverageLogger() self.map_logger = AverageLogger() def train(self, epoch): self.obj_logger.reset() self.iou_logger.reset() self.loss_logger.reset() self.model.train() if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader for i, (img_tensor, targets_tensor) in enumerate(pbar): with torch.no_grad(): if len(self.data_cfg['multi_scale']) > 2: target_size = np.random.choice(self.data_cfg['multi_scale']) img_tensor = interpolate(img_tensor, mode='bilinear', size=target_size, align_corners=False) _, _, h, w = img_tensor.shape img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) self.optimizer.zero_grad() with amp.autocast(enabled=True): ret = self.model(img_tensor, targets_tensor) obj_loss = ret['obj_loss'] iou_loss = ret['iou_loss'] loss = obj_loss + iou_loss self.scaler.scale(loss).backward() self.lr_adjuster(self.optimizer, i, epoch) ulr = self.optimizer.param_groups[0]['lr'] dlr = self.optimizer.param_groups[2]['lr'] self.scaler.step(self.optimizer) self.scaler.update() self.ema.update(self.model) self.obj_logger.update(obj_loss.item()) self.iou_logger.update(iou_loss.item()) self.loss_logger.update(loss.item()) if self.local_rank == 0: pbar.set_description( "epoch:{:2d}|size:{:3d}|loss:{:6.4f}|obj_loss:{:6.4f}|iou_loss:{:6.4f}|ulr:{:8.6f},dlr:{:8.6f}".format( epoch + 1, h, self.loss_logger.avg(), obj_loss.item(), iou_loss.item(), ulr, dlr )) self.ema.update_attr(self.model) print( "epoch:{:3d}|local:{:3d}|loss:{:6.4f}||obj_loss:{:6.4f}|iou_loss:{:6.4f}".format(epoch + 1, self.local_rank, self.loss_logger.avg(), self.obj_logger.avg(), self.iou_logger.avg(), ) ) @torch.no_grad() def val(self, epoch): self.model.eval() self.ema.ema.eval() predict_list = list() target_list = list() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for img_tensor, targets_tensor in pbar: _, _, h, w = img_tensor.shape targets_tensor[:, 1:] = targets_tensor[:, 1:] * torch.tensor(data=[w, h, w, h]) targets_tensor[:, [1, 2]] = targets_tensor[:, [1, 2]] - targets_tensor[:, [3, 4]] * 0.5 targets_tensor[:, [3, 4]] = targets_tensor[:, [1, 2]] + targets_tensor[:, [3, 4]] img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts = self.ema.ema(img_tensor)['predicts'] for i, pred in enumerate(predicts): if pred is not None: pred = torch.cat([pred, torch.zeros_like(pred[..., [0]])], dim=-1) predict_list.append(pred) targets_sample = targets_tensor[targets_tensor[:, 0] == i][:, 1:] targets_sample = torch.cat([torch.zeros_like(targets_sample[..., [0]]), targets_sample], dim=-1) target_list.append(targets_sample) mp, mr, map50, map = coco_map(predict_list, target_list) mp = reduce_sum(torch.tensor(mp, device=self.device)).item() / self.gpu_num mr = reduce_sum(torch.tensor(mr, device=self.device)).item() / self.gpu_num map50 = reduce_sum(torch.tensor(map50, device=self.device)).item() / self.gpu_num map = reduce_sum(torch.tensor(map, device=self.device)).item() / self.gpu_num if self.local_rank == 0: print("epoch: {:2d}|gpu_num:{:d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}" .format(epoch + 1, self.gpu_num, mp * 100, mr * 100, map50 * 100, map * 100)) last_weight_path = os.path.join(self.val_cfg['weight_path'], "{:s}_{:s}_last.pth" .format(self.cfg['model_name'], self.model_cfg['scale_name'])) best_map_weight_path = os.path.join(self.val_cfg['weight_path'], "{:s}_{:s}_best_map.pth" .format(self.cfg['model_name'], self.model_cfg['scale_name'])) ema_static = self.ema.ema.state_dict() cpkt = { "ema": ema_static, "map": map * 100, "epoch": epoch, } if self.local_rank != 0: return torch.save(cpkt, last_weight_path) if map > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = map def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch) dist.destroy_process_group() torch.cuda.empty_cache()
def visualize_model(weight_path="weights/solov2_resnet50_last.pth", device="cuda:0"): from pycocotools.coco import COCO device = torch.device(device) with open("config/solov2.yaml", 'r') as rf: cfg = yaml.safe_load(rf) # "box_score_thresh": 0.8 net = SOLOv2(**{**cfg['model'], 'pretrained': False, }) net.load_state_dict(torch.load(weight_path, map_location="cpu")['ema']) net.to(device) net.eval() data_cfg = cfg['data'] basic_transform = RandScaleMinMax(min_threshes=[640], max_thresh=data_cfg['max_thresh']) coco = COCO(data_cfg['val_annotation_path']) coco_predict_list = list() time_logger = AverageLogger() pbar = tqdm(coco.imgs.keys()) i = 0 for img_id in pbar: file_name = coco.imgs[img_id]['file_name'] img_path = os.path.join(data_cfg['val_img_root'], file_name) img = cv.imread(img_path) # ori_img = img.copy() h, w, _ = img.shape img, ratio = basic_transform.scale_img(img, min_thresh=640) h_, w_ = img.shape[:2] padding_size = make_divisible(max(h_, w_), 64) img_inp = np.ones((padding_size, padding_size, 3)) * np.array((103, 116, 123)) img_inp[:h_, :w_, :] = img img_inp = (img_inp[:, :, ::-1] / 255.0 - np.array(rgb_mean)) / np.array(rgb_std) img_inp = torch.from_numpy(img_inp).unsqueeze(0).permute(0, 3, 1, 2).contiguous().float().to(device) tic = time.time() predict = net(img_inp, valid_size=[(w_, h_)])['predicts'][0] duration = time.time() - tic time_logger.update(duration) pbar.set_description("fps:{:4.2f}".format(1 / time_logger.avg())) box, mask = predict if len(box) == 0: continue box = box.cpu().numpy() mask = (mask.cpu().numpy()).astype(np.uint8) if len(mask.shape) == 2: mask = mask[None, ...] mask = mask.transpose(1, 2, 0) mask = cv.resize(mask, dsize=(w, h)) if len(mask.shape) == 2: mask = mask[..., None] mask = mask.transpose(2, 0, 1) # mask = mask.cpu().numpy() for p, m in zip(box.tolist(), mask): coco_predict_list.append({'image_id': img_id, 'category_id': coco_ids[int(p[5])], # 'bbox': [round(x, 3) for x in b], 'score': round(p[4], 5), 'segmentation': maskUtils.encode(np.asfortranarray(m))}) # box_seg_info = BoxSegInfo(img=ori_img, shape=(w, h), boxes=box[:, :4], labels=box[:, -1], mask=mask) # ret_img = box_seg_info.draw_mask(colors, coco_names, boxes=False) # import uuid # file_name = str(uuid.uuid4()).replace("-", "") # cv.imwrite("{:s}.jpg".format(file_name), ret_img) # i += 1 # if i == 20: # break coco_eavl(anno_path=data_cfg['val_annotation_path'], pred_path=coco_predict_list, type="segm")
def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(self.cfg['gpus'].split(',')) dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collect_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collect_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = SparseRCNN(**self.model_cfg) self.best_map = 0. optimizer = split_optimizer_v2(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.scaler = amp.GradScaler( enabled=True) if self.optim_cfg['amp'] else None self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = IterWarmUpMultiStepDecay( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_iter=self.optim_cfg['warm_up_iter'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], warm_up_factor=self.optim_cfg['warm_up_factor']) self.cls_loss_logger = AverageLogger() self.l1_loss_logger = AverageLogger() self.iou_loss_logger = AverageLogger() self.match_num_logger = AverageLogger() self.loss_logger = AverageLogger()
class DDPMixSolver(object): def __init__(self, cfg_path): with open(cfg_path, 'r') as rf: self.cfg = yaml.safe_load(rf) self.data_cfg = self.cfg['data'] self.model_cfg = self.cfg['model'] self.optim_cfg = self.cfg['optim'] self.val_cfg = self.cfg['val'] print(self.data_cfg) print(self.model_cfg) print(self.optim_cfg) print(self.val_cfg) os.environ['CUDA_VISIBLE_DEVICES'] = self.cfg['gpus'] self.gpu_num = len(self.cfg['gpus'].split(',')) dist.init_process_group(backend='nccl') self.tdata = COCODataSets( img_root=self.data_cfg['train_img_root'], annotation_path=self.data_cfg['train_annotation_path'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=True, remove_blank=self.data_cfg['remove_blank']) self.tloader = DataLoader(dataset=self.tdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.tdata.collect_fn, sampler=DistributedSampler( dataset=self.tdata, shuffle=True)) self.vdata = COCODataSets( img_root=self.data_cfg['val_img_root'], annotation_path=self.data_cfg['val_annotation_path'], max_thresh=self.data_cfg['max_thresh'], debug=self.data_cfg['debug'], use_crowd=self.data_cfg['use_crowd'], augments=False, remove_blank=False) self.vloader = DataLoader(dataset=self.vdata, batch_size=self.data_cfg['batch_size'], num_workers=self.data_cfg['num_workers'], collate_fn=self.vdata.collect_fn, sampler=DistributedSampler( dataset=self.vdata, shuffle=False)) print("train_data: ", len(self.tdata), " | ", "val_data: ", len(self.vdata), " | ", "empty_data: ", self.tdata.empty_images_len) print("train_iter: ", len(self.tloader), " | ", "val_iter: ", len(self.vloader)) model = SparseRCNN(**self.model_cfg) self.best_map = 0. optimizer = split_optimizer_v2(model, self.optim_cfg) local_rank = dist.get_rank() self.local_rank = local_rank self.device = torch.device("cuda", local_rank) model.to(self.device) if self.optim_cfg['sync_bn']: model = nn.SyncBatchNorm.convert_sync_batchnorm(model) self.model = nn.parallel.distributed.DistributedDataParallel( model, device_ids=[local_rank], output_device=local_rank) self.scaler = amp.GradScaler( enabled=True) if self.optim_cfg['amp'] else None self.optimizer = optimizer self.ema = ModelEMA(self.model) self.lr_adjuster = IterWarmUpMultiStepDecay( init_lr=self.optim_cfg['lr'], milestones=self.optim_cfg['milestones'], warm_up_iter=self.optim_cfg['warm_up_iter'], iter_per_epoch=len(self.tloader), epochs=self.optim_cfg['epochs'], alpha=self.optim_cfg['alpha'], warm_up_factor=self.optim_cfg['warm_up_factor']) self.cls_loss_logger = AverageLogger() self.l1_loss_logger = AverageLogger() self.iou_loss_logger = AverageLogger() self.match_num_logger = AverageLogger() self.loss_logger = AverageLogger() # if self.local_rank == 0: # print(self.model) def train(self, epoch): self.loss_logger.reset() self.cls_loss_logger.reset() self.l1_loss_logger.reset() self.iou_loss_logger.reset() self.match_num_logger.reset() self.model.train() if self.local_rank == 0: pbar = tqdm(self.tloader) else: pbar = self.tloader for i, (img_tensor, targets_tensor, batch_len) in enumerate(pbar): _, _, h, w = img_tensor.shape with torch.no_grad(): img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) self.optimizer.zero_grad() if self.scaler is not None: with amp.autocast(enabled=True): out = self.model(img_tensor, targets={ "target": targets_tensor, "batch_len": batch_len }) cls_loss = out['cls_loss'] l1_loss = out['l1_loss'] iou_loss = out['iou_loss'] match_num = out['match_num'] loss = cls_loss + l1_loss + iou_loss self.scaler.scale(loss).backward() self.lr_adjuster(self.optimizer, i, epoch) self.scaler.step(self.optimizer) self.scaler.update() else: out = self.model(img_tensor, targets={ "target": targets_tensor, "batch_len": batch_len }) cls_loss = out['cls_loss'] l1_loss = out['l1_loss'] iou_loss = out['iou_loss'] match_num = out['match_num'] loss = cls_loss + l1_loss + iou_loss loss.backward() self.lr_adjuster(self.optimizer, i, epoch) self.optimizer.step() self.ema.update(self.model) lr = self.optimizer.param_groups[0]['lr'] self.loss_logger.update(loss.item()) self.iou_loss_logger.update(iou_loss.item()) self.l1_loss_logger.update(l1_loss.item()) self.cls_loss_logger.update(cls_loss.item()) self.match_num_logger.update(match_num) str_template = \ "epoch:{:2d}|match_num:{:0>4d}|size:{:3d}|loss:{:6.4f}|cls:{:6.4f}|l1:{:6.4f}|iou:{:6.4f}|lr:{:8.6f}" if self.local_rank == 0: pbar.set_description( str_template.format(epoch + 1, int(match_num), h, self.loss_logger.avg(), self.cls_loss_logger.avg(), self.l1_loss_logger.avg(), self.iou_loss_logger.avg(), lr)) self.ema.update_attr(self.model) loss_avg = reduce_sum( torch.tensor(self.loss_logger.avg(), device=self.device)) / self.gpu_num iou_loss_avg = reduce_sum( torch.tensor(self.iou_loss_logger.avg(), device=self.device)).item() / self.gpu_num l1_loss_avg = reduce_sum( torch.tensor(self.l1_loss_logger.avg(), device=self.device)).item() / self.gpu_num cls_loss_avg = reduce_sum( torch.tensor(self.cls_loss_logger.avg(), device=self.device)).item() / self.gpu_num match_num_sum = reduce_sum( torch.tensor(self.match_num_logger.sum(), device=self.device)).item() / self.gpu_num if self.local_rank == 0: final_template = "epoch:{:2d}|match_num:{:d}|loss:{:6.4f}|cls:{:6.4f}|l1:{:6.4f}|iou:{:6.4f}" print( final_template.format(epoch + 1, int(match_num_sum), loss_avg, cls_loss_avg, l1_loss_avg, iou_loss_avg)) @torch.no_grad() def val(self, epoch): predict_list = list() target_list = list() self.model.eval() self.ema.ema.eval() if self.local_rank == 0: pbar = tqdm(self.vloader) else: pbar = self.vloader for img_tensor, targets_tensor, batch_len in pbar: img_tensor = img_tensor.to(self.device) targets_tensor = targets_tensor.to(self.device) predicts = self.ema.ema(img_tensor)['predicts'] for pred, target in zip(predicts, targets_tensor.split(batch_len)): predict_list.append(pred) target_list.append(target) mp, mr, map50, mean_ap = coco_map(predict_list, target_list) mp = reduce_sum(torch.tensor(mp, device=self.device)) / self.gpu_num mr = reduce_sum(torch.tensor(mr, device=self.device)) / self.gpu_num map50 = reduce_sum(torch.tensor(map50, device=self.device)) / self.gpu_num mean_ap = reduce_sum(torch.tensor(mean_ap, device=self.device)) / self.gpu_num if self.local_rank == 0: print("*" * 20, "eval start", "*" * 20) print( "epoch: {:2d}|mp:{:6.4f}|mr:{:6.4f}|map50:{:6.4f}|map:{:6.4f}". format(epoch + 1, mp * 100, mr * 100, map50 * 100, mean_ap * 100)) print("*" * 20, "eval end", "*" * 20) last_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_{:s}_last.pth".format(self.cfg['model_name'], self.model_cfg['backbone'])) best_map_weight_path = os.path.join( self.val_cfg['weight_path'], "{:s}_{:s}_best_map.pth".format(self.cfg['model_name'], self.model_cfg['backbone'])) model_static = self.model.module.state_dict() cpkt = { "model": model_static, "map": mean_ap * 100, "epoch": epoch, "ema": self.ema.ema.state_dict() } if self.local_rank != 0: return torch.save(cpkt, last_weight_path) if mean_ap > self.best_map: torch.save(cpkt, best_map_weight_path) self.best_map = mean_ap def run(self): for epoch in range(self.optim_cfg['epochs']): self.train(epoch) if (epoch + 1) % self.val_cfg['interval'] == 0: self.val(epoch)