class Trainer(object): def __init__(self, weight_path, resume, gpu_id, accumulate): init_seeds(0) self.device = gpu.select_device(gpu_id) self.start_epoch = 0 self.best_mAP = 0. self.accumulate = accumulate self.epochs = cfg.TRAIN["EPOCHS"] self.weight_path = weight_path self.multi_scale_train = cfg.TRAIN["MULTI_SCALE_TRAIN"] self.train_dataset = data.Build_Dataset( anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) print('train img size is {}'.format(cfg.TRAIN["TRAIN_IMG_SIZE"])) self.train_dataloader = DataLoader( self.train_dataset, batch_size=cfg.TRAIN["BATCH_SIZE"], num_workers=cfg.TRAIN["NUMBER_WORKERS"], shuffle=True, pin_memory=True) self.yolov4 = Build_Model().to(self.device) self.optimizer = optim.SGD(self.yolov4.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"]) self.criterion = YoloV4Loss( anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"]) self.__load_model_weights(weight_path, resume) self.scheduler = cosine_lr_scheduler.CosineDecayLR( self.optimizer, T_max=self.epochs * len(self.train_dataloader), lr_init=cfg.TRAIN["LR_INIT"], lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(self.train_dataloader)) def __load_model_weights(self, weight_path, resume): if resume: last_weight = os.path.join( os.path.split(weight_path)[0], "last.pt") chkpt = torch.load(last_weight, map_location=self.device) self.yolov4.load_state_dict(chkpt['model']) self.start_epoch = chkpt['epoch'] + 1 if chkpt['optimizer'] is not None: self.optimizer.load_state_dict(chkpt['optimizer']) self.best_mAP = chkpt['best_mAP'] del chkpt else: self.yolov4.load_darknet_weights(weight_path) # def __save_model_weights(self, epoch, mAP): # if mAP > self.best_mAP: # self.best_mAP = mAP # best_weight = os.path.join(os.path.split(self.weight_path)[0], "best.pt") # last_weight = os.path.join(os.path.split(self.weight_path)[0], "last.pt") # chkpt = {'epoch': epoch, # 'best_mAP': self.best_mAP, # 'model': self.yolov3.state_dict(), # 'optimizer': self.optimizer.state_dict()} # torch.save(chkpt, last_weight) # # if self.best_mAP == mAP: # torch.save(chkpt['model'], best_weight) # # if epoch > 0 and epoch % 10 == 0: # torch.save(chkpt, os.path.join(os.path.split(self.weight_path)[0], 'backup_epoch%g.pt'%epoch)) # del chkpt def __save_model_weights(self, epoch, loss): weight = os.path.join( os.path.split(self.weight_path)[0], "epoch{}, loss{}.pt".format(epoch, loss)) chkpt = { 'epoch': epoch, 'model': self.yolov4.state_dict(), 'optimizer': self.optimizer.state_dict() } torch.save(chkpt['model'], weight) def train(self): global writer # 创建一个SummaryWriter实例 logger.info( "Training start,img size is: {:d},batchsize is: {:d},work number is {:d}" .format(cfg.TRAIN["TRAIN_IMG_SIZE"], cfg.TRAIN["BATCH_SIZE"], cfg.TRAIN["NUMBER_WORKERS"])) logger.info(self.yolov4) logger.info("Train datasets number is : {}".format( len(self.train_dataset))) self.yolov4, self.optimizer = amp.initialize(self.yolov4, self.optimizer, opt_level='O1', verbosity=0) logger.info(" ======= start training ====== ") for epoch in range(self.start_epoch, self.epochs): start = time.time() self.yolov4.train() mloss = torch.zeros(4) logger.info("===Epoch:[{}/{}]===".format(epoch, self.epochs)) for i, (imgs, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes) in enumerate(self.train_dataloader): self.scheduler.step(len(self.train_dataloader) * epoch + i) imgs = imgs.to(self.device) label_sbbox = label_sbbox.to(self.device) label_mbbox = label_mbbox.to(self.device) label_lbbox = label_lbbox.to(self.device) sbboxes = sbboxes.to(self.device) mbboxes = mbboxes.to(self.device) lbboxes = lbboxes.to(self.device) p, p_d = self.yolov4(imgs) loss, loss_giou, loss_conf, loss_cls = self.criterion( p, p_d, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes) with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() # Accumulate gradient for x batches before optimizing if i % self.accumulate == 0: self.optimizer.step() self.optimizer.zero_grad() # Update running mean of tracked metrics loss_items = torch.tensor( [loss_giou, loss_conf, loss_cls, loss]) mloss = (mloss * i + loss_items) / (i + 1) # Print batch results if i % 10 == 0: logger.info( " === Epoch:[{:3}/{}],step:[{:3}/{}],img_size:[{:3}],total_loss:{:.4f}|loss_giou:{:.4f}|loss_conf:{:.4f}|loss_cls:{:.4f}|lr:{:.4f}" .format(epoch, self.epochs, i, len(self.train_dataloader) - 1, self.train_dataset.img_size, mloss[3], mloss[0], mloss[1], mloss[2], self.optimizer.param_groups[0]['lr'])) writer.add_scalar('train_loss', mloss[3], i) # multi-sclae training (320-608 pixels) every 10 batches if self.multi_scale_train and (i + 1) % 10 == 0: # self.train_dataset.img_size = random.choice(range(5, 15)) * 32 # for imgsize 320 # self.train_dataset.img_size = random.choice(range(12, 22)) * 32 # for imgsize 544 self.train_dataset.img_size = random.choice(range(10, 20)) * 32 if epoch >= 0 and cfg.TRAIN["DATA_TYPE"] == 'VOC': self.__save_model_weights(epoch, mloss[3]) print('save weights done') # mAP = 0 # if epoch >= 0: # logger.info("===== Validate =====".format(epoch, self.epochs)) # with torch.no_grad(): # APs = Evaluator(self.yolov3,epoch,showatt=False).APs_voc() # for i in APs: # print("{} --> mAP : {}".format(i, APs[i])) # mAP += APs[i] # mAP = mAP / self.train_dataset.num_classes # writer.add_scalar('mAP', mAP, epoch) # logger.info(" ===test mAP:{:.3f}".format(mAP)) # writer.add_scalar('mAP', mAP, epoch) elif epoch >= 0 and cfg.TRAIN["DATA_TYPE"] == 'COCO': evaluator = COCOAPIEvaluator(model_type='YOLOv4', data_dir=cfg.DATA_PATH, img_size=cfg.VAL["TEST_IMG_SIZE"], confthre=0.08, nmsthre=cfg.VAL["NMS_THRESH"]) ap50_95, ap50 = evaluator.evaluate(self.yolov4) # logger.info('ap50_95:{}|ap50:{}'.format(ap50_95, ap50)) writer.add_scalar('val/COCOAP50', ap50, epoch) writer.add_scalar('val/COCOAP50_95', ap50_95, epoch) self.__save_model_weights(epoch, ap50) else: assert print('dataset must be VOC or COCO') end = time.time() logger.info(" ===cost time:{:.4f}s".format(end - start)) logger.info( "=====Training Finished. best_test_mAP:{:.3f}%====".format( self.best_mAP))
class Trainer(object): def __init__(self, weight_path=None, resume=False, gpu_id=0, accumulate=1, fp_16=False): init_seeds(0) self.fp_16 = fp_16 self.device = gpu.select_device(gpu_id) self.start_epoch = 0 self.best_mAP = 0.0 self.accumulate = accumulate self.weight_path = weight_path self.multi_scale_train = cfg.TRAIN["MULTI_SCALE_TRAIN"] self.showatt = cfg.TRAIN["showatt"] if self.multi_scale_train: print("Using multi scales training") else: print("train img size is {}".format(cfg.TRAIN["TRAIN_IMG_SIZE"])) self.train_dataset = data.Build_Dataset( anno_file_type="train", img_size=cfg.TRAIN["TRAIN_IMG_SIZE"]) self.epochs = (cfg.TRAIN["YOLO_EPOCHS"] if cfg.MODEL_TYPE["TYPE"] == "YOLOv4" else cfg.TRAIN["Mobilenet_YOLO_EPOCHS"]) self.eval_epoch = (30 if cfg.MODEL_TYPE["TYPE"] == "YOLOv4" else 50) self.train_dataloader = DataLoader( self.train_dataset, batch_size=cfg.TRAIN["BATCH_SIZE"], num_workers=cfg.TRAIN["NUMBER_WORKERS"], shuffle=True, pin_memory=True, ) self.yolov4 = Build_Model(weight_path=weight_path, resume=resume, showatt=self.showatt).to(self.device) self.optimizer = optim.SGD( self.yolov4.parameters(), lr=cfg.TRAIN["LR_INIT"], momentum=cfg.TRAIN["MOMENTUM"], weight_decay=cfg.TRAIN["WEIGHT_DECAY"], ) self.criterion = YoloV4Loss( anchors=cfg.MODEL["ANCHORS"], strides=cfg.MODEL["STRIDES"], iou_threshold_loss=cfg.TRAIN["IOU_THRESHOLD_LOSS"], ) self.scheduler = cosine_lr_scheduler.CosineDecayLR( self.optimizer, T_max=self.epochs * len(self.train_dataloader), lr_init=cfg.TRAIN["LR_INIT"], lr_min=cfg.TRAIN["LR_END"], warmup=cfg.TRAIN["WARMUP_EPOCHS"] * len(self.train_dataloader), ) if resume: self.__load_resume_weights(weight_path) def __load_resume_weights(self, weight_path): last_weight = os.path.join(os.path.split(weight_path)[0], "last.pt") chkpt = torch.load(last_weight, map_location=self.device) self.yolov4.load_state_dict(chkpt["model"]) self.start_epoch = chkpt["epoch"] + 1 if chkpt["optimizer"] is not None: self.optimizer.load_state_dict(chkpt["optimizer"]) self.best_mAP = chkpt["best_mAP"] del chkpt def __save_model_weights(self, epoch, mAP): if mAP > self.best_mAP: self.best_mAP = mAP best_weight = os.path.join( os.path.split(self.weight_path)[0], "best.pt") last_weight = os.path.join( os.path.split(self.weight_path)[0], "last.pt") chkpt = { "epoch": epoch, "best_mAP": self.best_mAP, "model": self.yolov4.state_dict(), "optimizer": self.optimizer.state_dict(), } torch.save(chkpt, last_weight) if self.best_mAP == mAP: torch.save(chkpt["model"], best_weight) if epoch > 0 and epoch % 10 == 0: torch.save( chkpt, os.path.join( os.path.split(self.weight_path)[0], "backup_epoch%g.pt" % epoch, ), ) del chkpt def train(self): global writer logger.info( "Training start,img size is: {:d},batchsize is: {:d},work number is {:d}" .format( cfg.TRAIN["TRAIN_IMG_SIZE"], cfg.TRAIN["BATCH_SIZE"], cfg.TRAIN["NUMBER_WORKERS"], )) logger.info(self.yolov4) logger.info("Train datasets number is : {}".format( len(self.train_dataset))) def is_valid_number(x): return not (math.isnan(x) or math.isinf(x) or x > 1e4) if self.fp_16: self.yolov4, self.optimizer = amp.initialize(self.yolov4, self.optimizer, opt_level="O1", verbosity=0) logger.info(" ======= start training ====== ") for epoch in range(self.start_epoch, self.epochs): start = time.time() self.yolov4.train() mloss = torch.zeros(4) logger.info("===Epoch:[{}/{}]===".format(epoch, self.epochs)) for i, ( imgs, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes, ) in enumerate(self.train_dataloader): self.scheduler.step( len(self.train_dataloader) / (cfg.TRAIN["BATCH_SIZE"]) * epoch + i) imgs = imgs.to(self.device) label_sbbox = label_sbbox.to(self.device) label_mbbox = label_mbbox.to(self.device) label_lbbox = label_lbbox.to(self.device) sbboxes = sbboxes.to(self.device) mbboxes = mbboxes.to(self.device) lbboxes = lbboxes.to(self.device) p, p_d = self.yolov4(imgs) loss, loss_ciou, loss_conf, loss_cls = self.criterion( p, p_d, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes, ) if is_valid_number(loss.item()): if self.fp_16: with amp.scale_loss(loss, self.optimizer) as scaled_loss: scaled_loss.backward() else: loss.backward() # Accumulate gradient for x batches before optimizing if i % self.accumulate == 0: self.optimizer.step() self.optimizer.zero_grad() # Update running mean of tracked metrics loss_items = torch.tensor( [loss_ciou, loss_conf, loss_cls, loss]) mloss = (mloss * i + loss_items) / (i + 1) # Print batch results if i % 10 == 0: logger.info( " === Epoch:[{:3}/{}],step:[{:3}/{}],img_size:[{:3}],total_loss:{:.4f}|loss_ciou:{:.4f}|loss_conf:{:.4f}|loss_cls:{:.4f}|lr:{:.4f}" .format( epoch, self.epochs, i, len(self.train_dataloader) - 1, self.train_dataset.img_size, mloss[3], mloss[0], mloss[1], mloss[2], self.optimizer.param_groups[0]["lr"], )) writer.add_scalar( "loss_ciou", mloss[0], len(self.train_dataloader) * epoch + i, ) writer.add_scalar( "loss_conf", mloss[1], len(self.train_dataloader) * epoch + i, ) writer.add_scalar( "loss_cls", mloss[2], len(self.train_dataloader) * epoch + i, ) writer.add_scalar( "train_loss", mloss[3], len(self.train_dataloader) * epoch + i, ) # multi-sclae training (320-608 pixels) every 10 batches if self.multi_scale_train and (i + 1) % 10 == 0: self.train_dataset.img_size = ( random.choice(range(10, 20)) * 32) if (cfg.TRAIN["DATA_TYPE"] == "VOC" or cfg.TRAIN["DATA_TYPE"] == "Customer"): mAP = 0.0 if epoch >= self.eval_epoch: logger.info("===== Validate =====".format( epoch, self.epochs)) logger.info("val img size is {}".format( cfg.VAL["TEST_IMG_SIZE"])) with torch.no_grad(): APs, inference_time = Evaluator( self.yolov4, showatt=self.showatt).APs_voc() for i in APs: logger.info("{} --> mAP : {}".format(i, APs[i])) mAP += APs[i] mAP = mAP / self.train_dataset.num_classes logger.info("mAP : {}".format(mAP)) logger.info( "inference time: {:.2f} ms".format(inference_time)) writer.add_scalar("mAP", mAP, epoch) self.__save_model_weights(epoch, mAP) logger.info("save weights done") logger.info(" ===test mAP:{:.3f}".format(mAP)) elif epoch >= 0 and cfg.TRAIN["DATA_TYPE"] == "COCO": evaluator = COCOAPIEvaluator( model_type="YOLOv4", data_dir=cfg.DATA_PATH, img_size=cfg.VAL["TEST_IMG_SIZE"], confthre=0.08, nmsthre=cfg.VAL["NMS_THRESH"], ) ap50_95, ap50 = evaluator.evaluate(self.yolov4) logger.info("ap50_95:{}|ap50:{}".format(ap50_95, ap50)) writer.add_scalar("val/COCOAP50", ap50, epoch) writer.add_scalar("val/COCOAP50_95", ap50_95, epoch) self.__save_model_weights(epoch, ap50) print("save weights done") end = time.time() logger.info(" ===cost time:{:.4f}s".format(end - start)) logger.info( "=====Training Finished. best_test_mAP:{:.3f}%====".format( self.best_mAP))
class Trainer(object): _resume = False _fine_tune = False def __init__(self, log_dir, resume=False, fine_tune=False): init_seeds(0) self._fine_tune = fine_tune self._resume = resume if self._fine_tune: self.__prepare_fine_tune() self.device = gpu.select_device() self.start_epoch = 0 self.best_mAP = 0. self.accumulate = cfg.TRAIN.ACCUMULATE self.log_dir = log_dir self.weight_path = "yolov4.weights" self.multi_scale_train = cfg.TRAIN.MULTI_SCALE_TRAIN if self.multi_scale_train: print('Using multi scales training') else: print('train img size is {}'.format(cfg.TRAIN.TRAIN_IMG_SIZE)) self.train_dataset = data.Build_Train_Dataset(anno_file=cfg.TRAIN.ANNO_FILE, anno_file_type="train", img_size=cfg.TRAIN.TRAIN_IMG_SIZE) self.epochs = cfg.TRAIN.YOLO_EPOCHS if cfg.MODEL.MODEL_TYPE == 'YOLOv4' else cfg.TRAIN.Mobilenet_YOLO_EPOCHS self.train_dataloader = DataLoader(self.train_dataset, batch_size=cfg.TRAIN.BATCH_SIZE//cfg.TRAIN.ACCUMULATE, num_workers=cfg.TRAIN.NUMBER_WORKERS, shuffle=True, pin_memory=True) self.yolov4 = Build_Model(weight_path="yolov4.weights", resume=resume) self.yolov4 = self.yolov4.to(self.device) self.optimizer = optim.SGD(self.yolov4.parameters(), lr=cfg.TRAIN.LR_INIT, momentum=cfg.TRAIN.MOMENTUM, weight_decay=cfg.TRAIN.WEIGHT_DECAY) self.criterion = YoloV4Loss(anchors=cfg.MODEL.ANCHORS, strides=cfg.MODEL.STRIDES, iou_threshold_loss=cfg.TRAIN.IOU_THRESHOLD_LOSS) self.scheduler = cosine_lr_scheduler.CosineDecayLR(self.optimizer, T_max=self.epochs*len(self.train_dataloader), lr_init=cfg.TRAIN.LR_INIT, lr_min=cfg.TRAIN.LR_END, warmup=cfg.TRAIN.WARMUP_EPOCHS*len(self.train_dataloader)) if resume: self.__load_resume_weights() if self._fine_tune: self.__load_best_weights() def __prepare_fine_tune(self): cfg.defrost() cfg.TRAIN.ANNO_FILE = cfg.FINE_TUNE.ANNO_FILE cfg.TRAIN.YOLO_EPOCHS = cfg.FINE_TUNE.YOLO_EPOCHS cfg.TRAIN.LR_INIT = cfg.FINE_TUNE.LR_INIT cfg.TRAIN.LR_END = cfg.FINE_TUNE.LR_END cfg.TRAIN.WARMUP_EPOCHS = cfg.FINE_TUNE.WARMUP_EPOCHS cfg.freeze() def __load_best_weights(self): best_weight = os.path.join(log_dir,"checkpoints", "best.pt") last_weight = os.path.join(log_dir,"checkpoints", "last.pt") shutil.copy2(best_weight, best_weight.replace("best.pt","best_before_fine_tune.pt")) shutil.copy2(last_weight, last_weight.replace("last.pt","last_before_fine_tune.pt")) last_chkpt = torch.load(last_weight, map_location=self.device) best_chkpt = torch.load(best_weight, map_location=self.device) self.yolov4.load_state_dict(best_chkpt) self.best_mAP = 0 del last_chkpt, best_chkpt def __load_resume_weights(self): last_weight = os.path.join(log_dir,"checkpoints", "last.pt") chkpt = torch.load(last_weight, map_location=self.device) self.yolov4.load_state_dict(chkpt['model']) self.start_epoch = chkpt['epoch'] + 1 if chkpt['optimizer'] is not None: self.optimizer.load_state_dict(chkpt['optimizer']) self.best_mAP = chkpt['best_mAP'] del chkpt def __save_model_weights(self, epoch, mAP): if mAP > self.best_mAP: self.best_mAP = mAP best_weight = os.path.join(log_dir,"checkpoints", "best.pt") last_weight = os.path.join(log_dir,"checkpoints", "last.pt") chkpt = {'epoch': epoch, 'best_mAP': self.best_mAP, 'model': self.yolov4.module.state_dict() if torch.cuda.device_count()>1 else self.yolov4.state_dict(), 'optimizer': self.optimizer.state_dict()} torch.save(chkpt, last_weight) if self.best_mAP == mAP: torch.save(chkpt['model'], best_weight) if self._fine_tune and epoch % 5 == 0: torch.save(chkpt['model'], os.path.join(log_dir,"checkpoints", 'backup_fine_tune_epoch_{:02d}.pt'.format(epoch))) del chkpt def train(self): global writer logger.info("Training start,img size is: {:d},batchsize is: {:d}, subdivision: {:d}, worker number is {:d}".format(cfg.TRAIN.TRAIN_IMG_SIZE, cfg.TRAIN.BATCH_SIZE, cfg.TRAIN.ACCUMULATE, cfg.TRAIN.NUMBER_WORKERS)) logger.info(self.yolov4) n_train = len(self.train_dataset) n_step = n_train // (cfg.TRAIN.BATCH_SIZE//cfg.TRAIN.ACCUMULATE) + 1 logger.info("Train datasets number is : {}".format(n_train)) evaluator = COCOAPIEvaluator(cfg=cfg, img_size=cfg.VAL.TEST_IMG_SIZE, confthre=cfg.VAL.CONF_THRESH, nmsthre=cfg.VAL.NMS_THRESH) if torch.cuda.device_count() > 1: self.yolov4 = torch.nn.DataParallel(self.yolov4) logger.info("\n=============== start training ===============") for epoch in range(self.start_epoch, self.epochs): start = time.time() self.yolov4.train() with tqdm(total=n_train, unit="imgs", desc=f'Epoch {epoch}/{self.epochs}', ncols=30) as pbar: for i, (imgs, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes) in enumerate(self.train_dataloader): imgs = imgs.to(self.device) label_sbbox = label_sbbox.to(self.device) label_mbbox = label_mbbox.to(self.device) label_lbbox = label_lbbox.to(self.device) sbboxes = sbboxes.to(self.device) mbboxes = mbboxes.to(self.device) lbboxes = lbboxes.to(self.device) p, p_d = self.yolov4(imgs) loss, loss_ciou, loss_conf, loss_cls = self.criterion(p, p_d, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes) loss.backward() # Accumulate gradient for x batches before optimizing if i % self.accumulate == 0: self.scheduler.step(n_step*epoch + i) self.optimizer.step() self.optimizer.zero_grad() # Print batch results if i % (5*self.accumulate) == 0: logger.info("{:3}: total_loss:{:.4f} | loss_ciou:{:.4f} | loss_conf:{:.4f} | loss_cls:{:.4f} | lr:{:.6f}".format( self.train_dataset.img_size, loss, loss_ciou, loss_conf, loss_cls, self.optimizer.param_groups[0]['lr'] )) writer.add_scalar('train/loss_ciou', loss_ciou, n_step * epoch + i) writer.add_scalar('train/loss_conf', loss_conf, n_step * epoch + i) writer.add_scalar('train/loss_cls', loss_cls, n_step * epoch + i) writer.add_scalar('train/train_loss', loss, n_step * epoch + i) writer.add_scalar('train/lr', self.optimizer.param_groups[0]['lr'], n_step * epoch + i) # multi-sclae training (320-608 pixels) every 10 batches if self.multi_scale_train and (i+1) % (5*self.accumulate) == 0: self.train_dataset.img_size = random.choice(range(10, 20)) * 32 pbar.update(imgs.shape[0]) mAP = 0. # evaluator = COCOAPIEvaluator(cfg=cfg, # img_size=cfg.VAL.TEST_IMG_SIZE, # confthre=cfg.VAL.CONF_THRESH, # nmsthre=cfg.VAL.NMS_THRESH) coco_stat = evaluator.evaluate(self.yolov4) logger.info("Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = {:.04f}".format(coco_stat[0])) logger.info("Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = {:.04f}".format(coco_stat[1])) logger.info("Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = {:.04f}".format(coco_stat[2])) logger.info("Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = {:.04f}".format(coco_stat[3])) logger.info("Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = {:.04f}".format(coco_stat[4])) logger.info("Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = {:.04f}".format(coco_stat[5])) logger.info("Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = {:.04f}".format(coco_stat[6])) logger.info("Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = {:.04f}".format(coco_stat[7])) logger.info("Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = {:.04f}".format(coco_stat[8])) logger.info("Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = {:.04f}".format(coco_stat[9])) logger.info("Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = {:.04f}".format(coco_stat[10])) logger.info("Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = {:.04f}".format(coco_stat[11])) writer.add_scalar('val/mAP_50_95', coco_stat[0], epoch) writer.add_scalar('val/mAP_50', coco_stat[1], epoch) writer.add_scalar('val/mAP_75', coco_stat[2], epoch) writer.add_scalar('val/mAP_small', coco_stat[3], epoch) writer.add_scalar('val/mAP_medium', coco_stat[4], epoch) writer.add_scalar('val/mAP_large', coco_stat[5], epoch) writer.add_scalar('val/mAR_max_1', coco_stat[6], epoch) writer.add_scalar('val/mAR_max_10', coco_stat[7], epoch) writer.add_scalar('val/mAR_max_100',coco_stat[8], epoch) writer.add_scalar('val/mAR_small', coco_stat[9], epoch) writer.add_scalar('val/mAR_medium', coco_stat[10], epoch) writer.add_scalar('val/mAR_large', coco_stat[11], epoch) self.__save_model_weights(epoch, coco_stat[0]) logger.info('save weights done') end = time.time() logger.info("cost time:{:.4f}s".format(end - start)) logger.info("=====Training Finished. best_test_mAP:{:.3f}%====".format(self.best_mAP))
loss, loss_ciou, loss_conf, loss_cls = criterion( p, p_d, label_sbbox, label_mbbox, label_lbbox, sbboxes, mbboxes, lbboxes, ) optimizer.zero_grad() loss.backward() optimizer.step() loss_items = torch.tensor([loss_ciou, loss_conf, loss_cls, loss]) mloss = (mloss * i + loss_items) / (i + 1) if i % 10 == 0: # lr = round(optimizer.param_groups[0]["lr"], 5) print( f"Epoch: {epoch}/{epochs}, step: [{i}/{len(train_dataloader) - 1}], total_loss: {mloss[3]:.4f}, loss_ciou: {mloss[0]:.4f}, loss_conf: {mloss[1]:.4f}, loss_cls: {mloss[2]:.4f}" ) chkpt = { "epoch": epoch, # "best_mAP": self.best_mAP, "model": yolov4.state_dict(), "optimizer": optimizer.state_dict(), } torch.save(chkpt, './model.pt')