def Model(self,gpu_devices=[0]): ''' User function: Set Model parameters Args: gpu_devices (list): List of GPU Device IDs to be used in training Returns: None ''' num_classes = self.system_dict["local"]["training_set"].num_classes(); efficientdet = EfficientDet(num_classes=num_classes) if self.system_dict["params"]["use_gpu"]: self.system_dict["params"]["gpu_devices"] = gpu_devices if len(self.system_dict["params"]["gpu_devices"])==1: os.environ["CUDA_VISIBLE_DEVICES"] = str(self.system_dict["params"]["gpu_devices"][0]) else: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(id) for id in self.system_dict["params"]["gpu_devices"]]) self.system_dict["local"]["device"] = 'cuda' if torch.cuda.is_available() else 'cpu' efficientdet = efficientdet.to(self.system_dict["local"]["device"]) efficientdet= torch.nn.DataParallel(efficientdet).to(self.system_dict["local"]["device"]) self.system_dict["local"]["model"] = efficientdet; self.system_dict["local"]["model"].train();
def Model(self,gpu_devices=[0]): num_classes = self.system_dict["local"]["training_set"].num_classes(); efficientdet = EfficientDet(num_classes=num_classes) if self.system_dict["params"]["use_gpu"]: self.system_dict["params"]["gpu_devices"] = gpu_devices if len(self.system_dict["params"]["gpu_devices"])==1: os.environ["CUDA_VISIBLE_DEVICES"] = str(self.system_dict["params"]["gpu_devices"][0]) else: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(id) for id in self.system_dict["params"]["gpu_devices"]]) self.system_dict["local"]["device"] = 'cuda' if torch.cuda.is_available() else 'cpu' efficientdet = efficientdet.to(self.system_dict["local"]["device"]) efficientdet= torch.nn.DataParallel(efficientdet).to(self.system_dict["local"]["device"]) self.system_dict["local"]["model"] = efficientdet; self.system_dict["local"]["model"].train();
def Model(self, model_name="efficientnet-b0", gpu_devices=[0], load_pretrained_model_from=None): ''' User function: Set Model parameters Args: gpu_devices (list): List of GPU Device IDs to be used in training Returns: None ''' if(not load_pretrained_model_from): num_classes = self.system_dict["local"]["training_set"].num_classes(); coeff = int(model_name[-1]) efficientdet = EfficientDet(num_classes=num_classes, compound_coef=coeff, model_name=model_name); if self.system_dict["params"]["use_gpu"]: self.system_dict["params"]["gpu_devices"] = gpu_devices if len(self.system_dict["params"]["gpu_devices"])==1: os.environ["CUDA_VISIBLE_DEVICES"] = str(self.system_dict["params"]["gpu_devices"][0]) else: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(id) for id in self.system_dict["params"]["gpu_devices"]]) self.system_dict["local"]["device"] = 'cuda' if torch.cuda.is_available() else 'cpu' efficientdet = efficientdet.to(self.system_dict["local"]["device"]) efficientdet= torch.nn.DataParallel(efficientdet).to(self.system_dict["local"]["device"]) self.system_dict["local"]["model"] = efficientdet; self.system_dict["local"]["model"].train(); else: efficientdet = torch.load(load_pretrained_model_from).module if self.system_dict["params"]["use_gpu"]: self.system_dict["params"]["gpu_devices"] = gpu_devices if len(self.system_dict["params"]["gpu_devices"])==1: os.environ["CUDA_VISIBLE_DEVICES"] = str(self.system_dict["params"]["gpu_devices"][0]) else: os.environ["CUDA_VISIBLE_DEVICES"] = ','.join([str(id) for id in self.system_dict["params"]["gpu_devices"]]) self.system_dict["local"]["device"] = 'cuda' if torch.cuda.is_available() else 'cpu' efficientdet = efficientdet.to(self.system_dict["local"]["device"]) efficientdet= torch.nn.DataParallel(efficientdet).to(self.system_dict["local"]["device"]) self.system_dict["local"]["model"] = efficientdet; self.system_dict["local"]["model"].train();
def train(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = CocoDataset(root_dir=opt.data_path, set="train2017", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) training_generator = DataLoader(training_set, **training_params) test_set = CocoDataset(root_dir=opt.data_path, set="val2017", transform=transforms.Compose( [Normalizer(), Resizer()])) test_generator = DataLoader(test_set, **test_params) model = EfficientDet(num_classes=training_set.num_classes()) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) if torch.cuda.is_available(): model = model.cuda() model = nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): model.train() # if torch.cuda.is_available(): # model.module.freeze_bn() # else: # model.freeze_bn() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( 'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}' .format(epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss, reg_loss, loss, total_loss)) writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Classfication_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(test_generator): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss, np.mean(loss))) writer.add_scalar('Test/Total_loss', loss, epoch) writer.add_scalar('Test/Regression_loss', reg_loss, epoch) writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch torch.save( model, os.path.join(opt.saved_path, "signatrix_efficientdet_coco.pth")) dummy_input = torch.rand(opt.batch_size, 3, 512, 512) if torch.cuda.is_available(): dummy_input = dummy_input.cuda() if isinstance(model, nn.DataParallel): model.module.backbone_net.model.set_swish( memory_efficient=False) torch.onnx.export(model.module, dummy_input, os.path.join( opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False, opset_version=11) model.module.backbone_net.model.set_swish( memory_efficient=True) else: model.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model, dummy_input, os.path.join( opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False, opset_version=11) model.backbone_net.model.set_swish(memory_efficient=True) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, loss)) break writer.close()
def train(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = CocoDataset(root_dir=opt.data_path, set="train2017", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) training_generator = DataLoader(training_set, **training_params) test_set = CocoDataset(root_dir=opt.data_path, set="val2017", transform=transforms.Compose( [Normalizer(), Resizer()])) test_generator = DataLoader(test_set, **test_params) channels_map = { 'efficientnet-b0': [40, 80, 192], 'efficientnet-b1': [40, 80, 192], 'efficientnet-b2': [48, 88, 208], 'efficientnet-b3': [48, 96, 232], 'efficientnet-b4': [56, 112, 272], 'efficientnet-b5': [64, 128, 304], 'efficientnet-b6': [72, 144, 344], 'efficientnet-b7': [80, 160, 384], 'efficientnet-b8': [80, 160, 384] } if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) if opt.resume: resume_path = os.path.join(opt.saved_path, 'signatrix_efficientdet_coco_latest.pth') model = torch.load(resume_path).module print("model loaded from {}".format(resume_path)) else: model = EfficientDet( num_classes=training_set.num_classes(), network=opt.backbone_network, remote_loading=opt.remote_loading, advprop=opt.advprop, conv_in_channels=channels_map[opt.backbone_network]) print("model created with backbone {}, advprop {}".format( opt.backbone_network, opt.advprop)) if torch.cuda.is_available(): model = model.cuda() model = nn.DataParallel(model) if opt.resume: m = round(opt.start_epoch / 100) opt.lr = opt.lr * (0.1**m) optimizer = torch.optim.Adam(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) start_epoch = 0 if opt.resume: start_epoch = opt.start_epoch for epoch in range(start_epoch, opt.num_epochs): model.train() # if torch.cuda.is_available(): # model.module.freeze_bn() # else: # model.freeze_bn() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( '{} Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}' .format(datetime.now(), epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss, reg_loss, loss, total_loss)) writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Classfication_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(test_generator): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( '{} Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(datetime.now(), epoch + 1, opt.num_epochs, cls_loss, reg_loss, np.mean(loss))) writer.add_scalar('Test/Total_loss', loss, epoch) writer.add_scalar('Test/Regression_loss', reg_loss, epoch) writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch torch.save( model, os.path.join( opt.saved_path, "signatrix_efficientdet_coco_best_epoch{}.pth".format( epoch))) ''' dummy_input = torch.rand(opt.batch_size, 3, 512, 512) if torch.cuda.is_available(): dummy_input = dummy_input.cuda() if isinstance(model, nn.DataParallel): model.module.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model.module, dummy_input, os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False) model.module.backbone_net.model.set_swish(memory_efficient=True) else: model.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model, dummy_input, os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False) model.backbone_net.model.set_swish(memory_efficient=True) ''' print("epoch:", epoch, "best_epoch:", best_epoch, "epoch - best_epoch=", epoch - best_epoch) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, loss)) break if epoch % opt.save_interval == 0: torch.save( model, os.path.join(opt.saved_path, "signatrix_efficientdet_coco_latest.pth")) writer.close()
def train(opt): if not os.path.isdir(opt.data_path): print(f"Data for dataset not found at {opt.data_path}") return num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = OpenImagesDataset( root_dir=opt.data_path, set_name="train", transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) training_loader = DataLoader(training_set, **training_params) test_set = OpenImagesDataset(root_dir=opt.data_path, set_name="val", transform=transforms.Compose( [Normalizer(), Resizer()])) test_loader = DataLoader(test_set, **test_params) model = EfficientDet(num_classes=training_set.num_classes()) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) if torch.cuda.is_available(): model = model.cuda() model = nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_loader) for epoch in range(opt.num_epochs): model.train() epoch_loss = [] progress_bar = tqdm(training_loader) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( f'Epoch: {epoch + 1}/{opt.num_epochs} | ' f'Iteration: {iter + 1}/{num_iter_per_epoch} | ' f'Cls loss: {cls_loss:.5f} | Reg loss: {reg_loss:.5f} | ' f'Batch loss: {loss:.5f} | Total loss: {total_loss:.5f}') writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Classification_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(test_loader): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( f'Epoch: {epoch + 1}/{opt.num_epochs} | ' f'Classification loss: {cls_loss:1.5f} | ' f'Regression loss: {reg_loss:1.5f} | Total loss: {np.mean(loss):1.5f}' ) writer.add_scalar('Test/Total_loss', loss, epoch) writer.add_scalar('Test/Regression_loss', reg_loss, epoch) writer.add_scalar('Test/Classification_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch torch.save( model, os.path.join(opt.saved_path, f'{opt.model_name}.pth')) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( f"Stop training at epoch {epoch}. The lowest loss achieved is {loss}" ) break torch.save(model, os.path.join(opt.saved_path, f'{opt.model_name}-final.pth')) writer.flush() writer.close()