def Train_Dataset(self, root_dir, coco_dir, img_dir, set_dir, batch_size=8, image_size=512, use_gpu=True, num_workers=3): self.system_dict["dataset"]["train"]["root_dir"] = root_dir self.system_dict["dataset"]["train"]["coco_dir"] = coco_dir self.system_dict["dataset"]["train"]["img_dir"] = img_dir self.system_dict["dataset"]["train"]["set_dir"] = set_dir self.system_dict["params"]["batch_size"] = batch_size self.system_dict["params"]["image_size"] = image_size self.system_dict["params"]["use_gpu"] = use_gpu self.system_dict["params"]["num_workers"] = num_workers if (self.system_dict["params"]["use_gpu"]): if torch.cuda.is_available(): self.system_dict["local"][ "num_gpus"] = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) self.system_dict["local"]["training_params"] = { "batch_size": self.system_dict["params"]["batch_size"] * self.system_dict["local"]["num_gpus"], "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": self.system_dict["params"]["num_workers"] } self.system_dict["local"]["training_set"] = CocoDataset( root_dir=self.system_dict["dataset"]["train"]["root_dir"] + "/" + self.system_dict["dataset"]["train"]["coco_dir"], img_dir=self.system_dict["dataset"]["train"]["img_dir"], set_dir=self.system_dict["dataset"]["train"]["set_dir"], transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) self.system_dict["local"]["training_generator"] = DataLoader( self.system_dict["local"]["training_set"], **self.system_dict["local"]["training_params"])
def train(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = CocoDataset(root_dir=opt.data_path, set="train2017", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) training_generator = DataLoader(training_set, **training_params) test_set = CocoDataset(root_dir=opt.data_path, set="val2017", transform=transforms.Compose( [Normalizer(), Resizer()])) test_generator = DataLoader(test_set, **test_params) model = EfficientDet(num_classes=training_set.num_classes()) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) if torch.cuda.is_available(): model = model.cuda() model = nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): model.train() # if torch.cuda.is_available(): # model.module.freeze_bn() # else: # model.freeze_bn() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( 'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}' .format(epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss, reg_loss, loss, total_loss)) writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Classfication_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(test_generator): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss, np.mean(loss))) writer.add_scalar('Test/Total_loss', loss, epoch) writer.add_scalar('Test/Regression_loss', reg_loss, epoch) writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch torch.save( model, os.path.join(opt.saved_path, "signatrix_efficientdet_coco.pth")) dummy_input = torch.rand(opt.batch_size, 3, 512, 512) if torch.cuda.is_available(): dummy_input = dummy_input.cuda() if isinstance(model, nn.DataParallel): model.module.backbone_net.model.set_swish( memory_efficient=False) torch.onnx.export(model.module, dummy_input, os.path.join( opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False, opset_version=11) model.module.backbone_net.model.set_swish( memory_efficient=True) else: model.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model, dummy_input, os.path.join( opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False, opset_version=11) model.backbone_net.model.set_swish(memory_efficient=True) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, loss)) break writer.close()
def train(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() else: raise Exception('no GPU') cudnn.benchmark = True training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = CocoDataset(root_dir=opt.data_path, set="train2017", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) training_generator = DataLoader(training_set, **training_params) test_set = CocoDataset(root_dir=opt.data_path, set="val2017", transform=transforms.Compose( [Normalizer(), Resizer()])) test_generator = DataLoader(test_set, **test_params) opt.num_classes = training_set.num_classes() model = EfficientDet(opt) if opt.resume: print('Loading model...') model.load_state_dict( torch.load(os.path.join(opt.saved_path, opt.network + '.pth'))) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) model = model.cuda() model = nn.DataParallel(model) optimizer = torch.optim.AdamW(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): print('Epoch: {}/{}:'.format(epoch + 1, opt.num_epochs)) model.train() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, cls_2_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, cls_2_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() cls_2_loss = cls_2_loss.mean() loss = cls_loss + cls_2_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( 'Epoch: {}/{}. Iteration: {}/{}'.format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch)) progress_bar.write( 'Cls loss: {:.5f}\tReg loss: {:.5f}\tCls+Reg loss: {:.5f}\tBatch loss: {:.5f}\tTotal loss: {:.5f}' .format(cls_loss, reg_loss, cls_loss + reg_loss, loss, total_loss)) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] loss_classification_2_ls = [] progress_bar = tqdm(test_generator) progress_bar.set_description_str(' Evaluating') for iter, data in enumerate(progress_bar): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, cls_2_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, cls_2_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() cls_2_loss = cls_2_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_classification_2_ls.append(float(cls_2_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) cls_2_loss = np.mean(loss_classification_2_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + cls_2_loss + reg_loss print( 'Epoch: {}/{}. \nClassification loss: {:1.5f}. \tClassification_2 loss: {:1.5f}. \tRegression loss: {:1.5f}. \tTotal loss: {:1.5f}' .format(epoch + 1, opt.num_epochs, cls_loss, cls_2_loss, reg_loss, np.mean(loss))) if loss + opt.es_min_delta < best_loss: print('Saving model...') best_loss = loss best_epoch = epoch torch.save(model.module.state_dict(), os.path.join(opt.saved_path, opt.network + '.pth')) # torch.save(model, os.path.join(opt.saved_path, opt.network+'.pth')) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, loss)) break
def Train_Dataset(self, root_dir, coco_dir, img_dir, set_dir, batch_size=8, image_size=512, use_gpu=True, num_workers=3): ''' User function: Set training dataset parameters Dataset Directory Structure root_dir | |------coco_dir | | | |----img_dir | | | |------<set_dir_train> (set_dir) (Train) | | | |---------img1.jpg | |---------img2.jpg | |---------..........(and so on) | | | |---annotations | |----| | |--------------------instances_Train.json (instances_<set_dir_train>.json) | |--------------------classes.txt - instances_Train.json -> In proper COCO format - classes.txt -> A list of classes in alphabetical order For TrainSet - root_dir = "../sample_dataset"; - coco_dir = "kangaroo"; - img_dir = "images"; - set_dir = "Train"; Note: Annotation file name too coincides against the set_dir Args: root_dir (str): Path to root directory containing coco_dir coco_dir (str): Name of coco_dir containing image folder and annotation folder img_dir (str): Name of folder containing all training and validation folders set_dir (str): Name of folder containing all training images batch_size (int): Mini batch sampling size for training epochs image_size (int): Either of [512, 300] use_gpu (bool): If True use GPU else run on CPU num_workers (int): Number of parallel processors for data loader Returns: None ''' self.system_dict["dataset"]["train"]["root_dir"] = root_dir self.system_dict["dataset"]["train"]["coco_dir"] = coco_dir self.system_dict["dataset"]["train"]["img_dir"] = img_dir self.system_dict["dataset"]["train"]["set_dir"] = set_dir self.system_dict["params"]["batch_size"] = batch_size self.system_dict["params"]["image_size"] = image_size self.system_dict["params"]["use_gpu"] = use_gpu self.system_dict["params"]["num_workers"] = num_workers if (self.system_dict["params"]["use_gpu"]): if torch.cuda.is_available(): self.system_dict["local"][ "num_gpus"] = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) self.system_dict["local"]["training_params"] = { "batch_size": self.system_dict["params"]["batch_size"] * self.system_dict["local"]["num_gpus"], "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": self.system_dict["params"]["num_workers"] } self.system_dict["local"]["training_set"] = CocoDataset( root_dir=self.system_dict["dataset"]["train"]["root_dir"] + "/" + self.system_dict["dataset"]["train"]["coco_dir"], img_dir=self.system_dict["dataset"]["train"]["img_dir"], set_dir=self.system_dict["dataset"]["train"]["set_dir"], transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) self.system_dict["local"]["training_generator"] = DataLoader( self.system_dict["local"]["training_set"], **self.system_dict["local"]["training_params"])
def train(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = CocoDataset(root_dir=opt.data_path, set="train2017", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) training_generator = DataLoader(training_set, **training_params) test_set = CocoDataset(root_dir=opt.data_path, set="val2017", transform=transforms.Compose( [Normalizer(), Resizer()])) test_generator = DataLoader(test_set, **test_params) channels_map = { 'efficientnet-b0': [40, 80, 192], 'efficientnet-b1': [40, 80, 192], 'efficientnet-b2': [48, 88, 208], 'efficientnet-b3': [48, 96, 232], 'efficientnet-b4': [56, 112, 272], 'efficientnet-b5': [64, 128, 304], 'efficientnet-b6': [72, 144, 344], 'efficientnet-b7': [80, 160, 384], 'efficientnet-b8': [80, 160, 384] } if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) if opt.resume: resume_path = os.path.join(opt.saved_path, 'signatrix_efficientdet_coco_latest.pth') model = torch.load(resume_path).module print("model loaded from {}".format(resume_path)) else: model = EfficientDet( num_classes=training_set.num_classes(), network=opt.backbone_network, remote_loading=opt.remote_loading, advprop=opt.advprop, conv_in_channels=channels_map[opt.backbone_network]) print("model created with backbone {}, advprop {}".format( opt.backbone_network, opt.advprop)) if torch.cuda.is_available(): model = model.cuda() model = nn.DataParallel(model) if opt.resume: m = round(opt.start_epoch / 100) opt.lr = opt.lr * (0.1**m) optimizer = torch.optim.Adam(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) start_epoch = 0 if opt.resume: start_epoch = opt.start_epoch for epoch in range(start_epoch, opt.num_epochs): model.train() # if torch.cuda.is_available(): # model.module.freeze_bn() # else: # model.freeze_bn() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( '{} Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}' .format(datetime.now(), epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss, reg_loss, loss, total_loss)) writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Classfication_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(test_generator): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( '{} Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(datetime.now(), epoch + 1, opt.num_epochs, cls_loss, reg_loss, np.mean(loss))) writer.add_scalar('Test/Total_loss', loss, epoch) writer.add_scalar('Test/Regression_loss', reg_loss, epoch) writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch torch.save( model, os.path.join( opt.saved_path, "signatrix_efficientdet_coco_best_epoch{}.pth".format( epoch))) ''' dummy_input = torch.rand(opt.batch_size, 3, 512, 512) if torch.cuda.is_available(): dummy_input = dummy_input.cuda() if isinstance(model, nn.DataParallel): model.module.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model.module, dummy_input, os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False) model.module.backbone_net.model.set_swish(memory_efficient=True) else: model.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model, dummy_input, os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False) model.backbone_net.model.set_swish(memory_efficient=True) ''' print("epoch:", epoch, "best_epoch:", best_epoch, "epoch - best_epoch=", epoch - best_epoch) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, loss)) break if epoch % opt.save_interval == 0: torch.save( model, os.path.join(opt.saved_path, "signatrix_efficientdet_coco_latest.pth")) writer.close()
def train(opt): if not os.path.isdir(opt.data_path): print(f"Data for dataset not found at {opt.data_path}") return num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = OpenImagesDataset( root_dir=opt.data_path, set_name="train", transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) training_loader = DataLoader(training_set, **training_params) test_set = OpenImagesDataset(root_dir=opt.data_path, set_name="val", transform=transforms.Compose( [Normalizer(), Resizer()])) test_loader = DataLoader(test_set, **test_params) model = EfficientDet(num_classes=training_set.num_classes()) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) if torch.cuda.is_available(): model = model.cuda() model = nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_loader) for epoch in range(opt.num_epochs): model.train() epoch_loss = [] progress_bar = tqdm(training_loader) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( f'Epoch: {epoch + 1}/{opt.num_epochs} | ' f'Iteration: {iter + 1}/{num_iter_per_epoch} | ' f'Cls loss: {cls_loss:.5f} | Reg loss: {reg_loss:.5f} | ' f'Batch loss: {loss:.5f} | Total loss: {total_loss:.5f}') writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Classification_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(test_loader): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( f'Epoch: {epoch + 1}/{opt.num_epochs} | ' f'Classification loss: {cls_loss:1.5f} | ' f'Regression loss: {reg_loss:1.5f} | Total loss: {np.mean(loss):1.5f}' ) writer.add_scalar('Test/Total_loss', loss, epoch) writer.add_scalar('Test/Regression_loss', reg_loss, epoch) writer.add_scalar('Test/Classification_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch torch.save( model, os.path.join(opt.saved_path, f'{opt.model_name}.pth')) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( f"Stop training at epoch {epoch}. The lowest loss achieved is {loss}" ) break torch.save(model, os.path.join(opt.saved_path, f'{opt.model_name}-final.pth')) writer.flush() writer.close()