def train(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = CocoDataset(root_dir=opt.data_path, set="train2017", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) training_generator = DataLoader(training_set, **training_params) test_set = CocoDataset(root_dir=opt.data_path, set="val2017", transform=transforms.Compose( [Normalizer(), Resizer()])) test_generator = DataLoader(test_set, **test_params) model = EfficientDet(num_classes=training_set.num_classes()) if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) if torch.cuda.is_available(): model = model.cuda() model = nn.DataParallel(model) optimizer = torch.optim.Adam(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): model.train() # if torch.cuda.is_available(): # model.module.freeze_bn() # else: # model.freeze_bn() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( 'Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}' .format(epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss, reg_loss, loss, total_loss)) writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Classfication_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(test_generator): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( 'Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(epoch + 1, opt.num_epochs, cls_loss, reg_loss, np.mean(loss))) writer.add_scalar('Test/Total_loss', loss, epoch) writer.add_scalar('Test/Regression_loss', reg_loss, epoch) writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch torch.save( model, os.path.join(opt.saved_path, "signatrix_efficientdet_coco.pth")) dummy_input = torch.rand(opt.batch_size, 3, 512, 512) if torch.cuda.is_available(): dummy_input = dummy_input.cuda() if isinstance(model, nn.DataParallel): model.module.backbone_net.model.set_swish( memory_efficient=False) torch.onnx.export(model.module, dummy_input, os.path.join( opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False, opset_version=11) model.module.backbone_net.model.set_swish( memory_efficient=True) else: model.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model, dummy_input, os.path.join( opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False, opset_version=11) model.backbone_net.model.set_swish(memory_efficient=True) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, loss)) break writer.close()
def train(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() else: raise Exception('no GPU') cudnn.benchmark = True training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = CocoDataset(root_dir=opt.data_path, set="train2017", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) training_generator = DataLoader(training_set, **training_params) test_set = CocoDataset(root_dir=opt.data_path, set="val2017", transform=transforms.Compose( [Normalizer(), Resizer()])) test_generator = DataLoader(test_set, **test_params) opt.num_classes = training_set.num_classes() model = EfficientDet(opt) if opt.resume: print('Loading model...') model.load_state_dict( torch.load(os.path.join(opt.saved_path, opt.network + '.pth'))) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) model = model.cuda() model = nn.DataParallel(model) optimizer = torch.optim.AdamW(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) for epoch in range(opt.num_epochs): print('Epoch: {}/{}:'.format(epoch + 1, opt.num_epochs)) model.train() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, cls_2_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, cls_2_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() cls_2_loss = cls_2_loss.mean() loss = cls_loss + cls_2_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( 'Epoch: {}/{}. Iteration: {}/{}'.format( epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch)) progress_bar.write( 'Cls loss: {:.5f}\tReg loss: {:.5f}\tCls+Reg loss: {:.5f}\tBatch loss: {:.5f}\tTotal loss: {:.5f}' .format(cls_loss, reg_loss, cls_loss + reg_loss, loss, total_loss)) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] loss_classification_2_ls = [] progress_bar = tqdm(test_generator) progress_bar.set_description_str(' Evaluating') for iter, data in enumerate(progress_bar): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, cls_2_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, cls_2_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() cls_2_loss = cls_2_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_classification_2_ls.append(float(cls_2_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) cls_2_loss = np.mean(loss_classification_2_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + cls_2_loss + reg_loss print( 'Epoch: {}/{}. \nClassification loss: {:1.5f}. \tClassification_2 loss: {:1.5f}. \tRegression loss: {:1.5f}. \tTotal loss: {:1.5f}' .format(epoch + 1, opt.num_epochs, cls_loss, cls_2_loss, reg_loss, np.mean(loss))) if loss + opt.es_min_delta < best_loss: print('Saving model...') best_loss = loss best_epoch = epoch torch.save(model.module.state_dict(), os.path.join(opt.saved_path, opt.network + '.pth')) # torch.save(model, os.path.join(opt.saved_path, opt.network+'.pth')) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, loss)) break
def test(opt): test_set = CocoDataset(opt.data_path, set='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) opt.num_classes = test_set.num_classes() opt.batch_size = opt.batch_size * 4 test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } test_generator = DataLoader(test_set, **test_params) model = EfficientDet(opt) model.load_state_dict( torch.load(os.path.join(opt.pretrained_model, opt.network + '.pth'))) model.cuda() model.set_is_training(False) model.eval() if os.path.isdir(opt.prediction_dir): shutil.rmtree(opt.prediction_dir) os.makedirs(opt.prediction_dir) progress_bar = tqdm(test_generator) progress_bar.set_description_str(' Evaluating') IoU_scores = [] for i, data in enumerate(progress_bar): scale = data['scale'] with torch.no_grad(): output_list = model(data['img'].cuda().float()) for j, output in enumerate(output_list): scores, labels, boxes = output annot = data['annot'][j] annot = annot[annot[:, 4] != -1] # print(scores.size(), labels.size(), boxes.size(), annot.size()) if boxes.shape[0] == 0: if annot.size(0) == 0: IoU_scores.append(1.0) else: IoU_scores.append(0.0) continue if annot.size(0) == 0: IoU_scores.append(0.0) else: classes = set(annot[:, 4].tolist()) cat = torch.cat( [scores.view(-1, 1), labels.view(-1, 1).float(), boxes], dim=1) cat = cat[cat[:, 0] >= opt.cls_threshold] iou_score = [] for c in classes: box = cat[cat[:, 1] == c][:, 2:] if box.size(0) == 0: iou_score.append(0.0) continue tgt = annot[annot[:, 4] == c][:, :4] iou_s = iou(box, tgt.cuda()) iou_score.append(iou_s.cpu().numpy()) classes_pre = set(cat[:, 1].tolist()) for c in classes_pre: if c not in classes: iou_score.append(0) # print(classes_pre, classes ,iou_score) IoU_scores.append(sum(iou_score) / len(iou_score)) if writePIC: annot /= scale[j] boxes /= scale[j] image_info = test_set.coco.loadImgs( test_set.image_ids[i * opt.batch_size + j])[0] # print(image_info['file_name']) path = os.path.join(test_set.root_dir, 'images', test_set.set_name, image_info['file_name']) output_image = cv2.imread(path) # print(output_image.shape) for box_id in range(boxes.shape[0]): pred_prob = float(scores[box_id]) if pred_prob < opt.cls_threshold: break pred_label = int(labels[box_id]) xmin, ymin, xmax, ymax = boxes[box_id, :] color = colors[pred_label] cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), color, 1) text_size = cv2.getTextSize( COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob, cv2.FONT_HERSHEY_PLAIN, 1, 1)[0] cv2.rectangle( output_image, (xmin, ymin), (xmin + text_size[0] + 3, ymin + text_size[1] + 4), color, -1) cv2.putText( output_image, COCO_CLASSES[pred_label] + ' : %.2f' % pred_prob, (xmin, ymin + text_size[1] + 4), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for box_id in range(annot.size(0)): xmin, ymin, xmax, ymax = annot[box_id, :4] cv2.rectangle(output_image, (xmin, ymin), (xmax, ymax), (255, 0, 0), 1) cv2.imwrite( "{}/{}_prediction.jpg".format( opt.prediction_dir, image_info["file_name"][:-4]), output_image) print(sum(IoU_scores) / len(IoU_scores))
def train(opt): num_gpus = 1 if torch.cuda.is_available(): num_gpus = torch.cuda.device_count() torch.cuda.manual_seed(123) else: torch.manual_seed(123) training_params = { "batch_size": opt.batch_size * num_gpus, "shuffle": True, "drop_last": True, "collate_fn": collater, "num_workers": 12 } test_params = { "batch_size": opt.batch_size, "shuffle": False, "drop_last": False, "collate_fn": collater, "num_workers": 12 } training_set = CocoDataset(root_dir=opt.data_path, set="train2017", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) training_generator = DataLoader(training_set, **training_params) test_set = CocoDataset(root_dir=opt.data_path, set="val2017", transform=transforms.Compose( [Normalizer(), Resizer()])) test_generator = DataLoader(test_set, **test_params) channels_map = { 'efficientnet-b0': [40, 80, 192], 'efficientnet-b1': [40, 80, 192], 'efficientnet-b2': [48, 88, 208], 'efficientnet-b3': [48, 96, 232], 'efficientnet-b4': [56, 112, 272], 'efficientnet-b5': [64, 128, 304], 'efficientnet-b6': [72, 144, 344], 'efficientnet-b7': [80, 160, 384], 'efficientnet-b8': [80, 160, 384] } if os.path.isdir(opt.log_path): shutil.rmtree(opt.log_path) os.makedirs(opt.log_path) if not os.path.isdir(opt.saved_path): os.makedirs(opt.saved_path) writer = SummaryWriter(opt.log_path) if opt.resume: resume_path = os.path.join(opt.saved_path, 'signatrix_efficientdet_coco_latest.pth') model = torch.load(resume_path).module print("model loaded from {}".format(resume_path)) else: model = EfficientDet( num_classes=training_set.num_classes(), network=opt.backbone_network, remote_loading=opt.remote_loading, advprop=opt.advprop, conv_in_channels=channels_map[opt.backbone_network]) print("model created with backbone {}, advprop {}".format( opt.backbone_network, opt.advprop)) if torch.cuda.is_available(): model = model.cuda() model = nn.DataParallel(model) if opt.resume: m = round(opt.start_epoch / 100) opt.lr = opt.lr * (0.1**m) optimizer = torch.optim.Adam(model.parameters(), opt.lr) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) best_loss = 1e5 best_epoch = 0 model.train() num_iter_per_epoch = len(training_generator) start_epoch = 0 if opt.resume: start_epoch = opt.start_epoch for epoch in range(start_epoch, opt.num_epochs): model.train() # if torch.cuda.is_available(): # model.module.freeze_bn() # else: # model.freeze_bn() epoch_loss = [] progress_bar = tqdm(training_generator) for iter, data in enumerate(progress_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss = cls_loss + reg_loss if loss == 0: continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) total_loss = np.mean(epoch_loss) progress_bar.set_description( '{} Epoch: {}/{}. Iteration: {}/{}. Cls loss: {:.5f}. Reg loss: {:.5f}. Batch loss: {:.5f} Total loss: {:.5f}' .format(datetime.now(), epoch + 1, opt.num_epochs, iter + 1, num_iter_per_epoch, cls_loss, reg_loss, loss, total_loss)) writer.add_scalar('Train/Total_loss', total_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Regression_loss', reg_loss, epoch * num_iter_per_epoch + iter) writer.add_scalar('Train/Classfication_loss (focal loss)', cls_loss, epoch * num_iter_per_epoch + iter) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) if epoch % opt.test_interval == 0: model.eval() loss_regression_ls = [] loss_classification_ls = [] for iter, data in enumerate(test_generator): with torch.no_grad(): if torch.cuda.is_available(): cls_loss, reg_loss = model( [data['img'].cuda().float(), data['annot'].cuda()]) else: cls_loss, reg_loss = model( [data['img'].float(), data['annot']]) cls_loss = cls_loss.mean() reg_loss = reg_loss.mean() loss_classification_ls.append(float(cls_loss)) loss_regression_ls.append(float(reg_loss)) cls_loss = np.mean(loss_classification_ls) reg_loss = np.mean(loss_regression_ls) loss = cls_loss + reg_loss print( '{} Epoch: {}/{}. Classification loss: {:1.5f}. Regression loss: {:1.5f}. Total loss: {:1.5f}' .format(datetime.now(), epoch + 1, opt.num_epochs, cls_loss, reg_loss, np.mean(loss))) writer.add_scalar('Test/Total_loss', loss, epoch) writer.add_scalar('Test/Regression_loss', reg_loss, epoch) writer.add_scalar('Test/Classfication_loss (focal loss)', cls_loss, epoch) if loss + opt.es_min_delta < best_loss: best_loss = loss best_epoch = epoch torch.save( model, os.path.join( opt.saved_path, "signatrix_efficientdet_coco_best_epoch{}.pth".format( epoch))) ''' dummy_input = torch.rand(opt.batch_size, 3, 512, 512) if torch.cuda.is_available(): dummy_input = dummy_input.cuda() if isinstance(model, nn.DataParallel): model.module.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model.module, dummy_input, os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False) model.module.backbone_net.model.set_swish(memory_efficient=True) else: model.backbone_net.model.set_swish(memory_efficient=False) torch.onnx.export(model, dummy_input, os.path.join(opt.saved_path, "signatrix_efficientdet_coco.onnx"), verbose=False) model.backbone_net.model.set_swish(memory_efficient=True) ''' print("epoch:", epoch, "best_epoch:", best_epoch, "epoch - best_epoch=", epoch - best_epoch) # Early stopping if epoch - best_epoch > opt.es_patience > 0: print( "Stop training at epoch {}. The lowest loss achieved is {}" .format(epoch, loss)) break if epoch % opt.save_interval == 0: torch.save( model, os.path.join(opt.saved_path, "signatrix_efficientdet_coco_latest.pth")) writer.close()