def main(): # initialize the model retinanet, device = model_init(model_name) retinanet.to(device) retinanet = torch.nn.DataParallel(retinanet).to(device) retinanet.training = False retinanet.eval() retinanet.module.freeze_bn() coco_eval.evaluate_coco(dataset_val, retinanet, threshold=0.05)
def main(args=None): parser = argparse.ArgumentParser(description="Evaluate network parameters pf RetinaNet") parser.add_argument("--dataset") parser.add_argument("--coco_path") parser.add_argument("--saved_weights") parser = parser.parse_args(args) if parser.dataset=="coco": dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) #Load the network retinanet = torch.load(parser.saved_weights) retinanet.eval() #Evaluate the netwoek on coco coco_eval.evaluate_coco(dataset_val, retinanet)
loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print(json.dumps({ "epoch_num" : epoch_num, "iter_num" : iter_num, "img_num" : iter_num * args.batch_size, "cls_loss" : float(cls_loss), "reg_loss" : float(reg_loss), "loss_hist" : float(np.mean(loss_hist)), "elapsed" : time() - start_time, })) sys.stdout.flush() del cls_loss del reg_loss print('Evaluating dataset') if args.dataset == 'coco': coco_eval.evaluate_coco(dataset_val, retinanet) elif args.dataset == 'csv' and args.csv_val is not None: _ = csv_eval.evaluate(dataset_val, retinanet) lr_scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(args.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--optimizer', help='[SGD | Adam]', type=str, default='SGD') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) # Create the data loaders print("\n[Phase 1]: Creating DataLoader for {} dataset".format( parser.dataset)) if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2014', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2014', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=16, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=8, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') print('| Num training images: {}'.format(len(dataset_train))) print('| Num test images : {}'.format(len(dataset_val))) print("\n[Phase 2]: Preparing RetinaNet Detection Model...") use_gpu = torch.cuda.is_available() if use_gpu: device = torch.device('cuda') retinanet = retinanet.to(device) retinanet = torch.nn.DataParallel(retinanet, device_ids=range( torch.cuda.device_count())) print("| Using %d GPUs for Train/Validation!" % torch.cuda.device_count()) retinanet.training = True if parser.optimizer == 'Adam': optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) # not mentioned print("| Adam Optimizer with Learning Rate = {}".format(1e-5)) elif parser.optimizer == 'SGD': optimizer = optim.SGD(retinanet.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4) print("| SGD Optimizer with Learning Rate = {}".format(1e-2)) else: raise ValueError('Unsupported Optimizer, must be one of [SGD | Adam]') scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn( ) # Freeze the BN parameters to ImageNet configuration # Check if there is a 'checkpoints' path if not osp.exists('./checkpoints/'): os.makedirs('./checkpoints/') print("\n[Phase 3]: Training Model on {} dataset...".format( parser.dataset)) for epoch_num in range(parser.epochs): epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].to(device), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.001) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) sys.stdout.write('\r') sys.stdout.write( '| Epoch: {} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num + 1, iter_num + 1, len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist))) sys.stdout.flush() del classification_loss del regression_loss except Exception as e: print(e) continue print("\n| Saving current best model at epoch {}...".format(epoch_num + 1)) torch.save( retinanet.state_dict(), './checkpoints/{}_retinanet_{}.pt'.format(parser.dataset, epoch_num + 1)) if parser.dataset == 'coco': #print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, device) elif parser.dataset == 'csv' and parser.csv_val is not None: #print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet, device) scheduler.step(np.mean(epoch_loss)) retinanet.eval() torch.save(retinanet.state_dict(), './checkpoints/model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', default="csv", help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', default="./data/train_only.csv", help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', default="./data/classes.csv", help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', default="./data/train_only.csv", help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--voc_train', default="./data/voc_train", help='Path to containing images and annAnnotations') parser.add_argument('--voc_val', default="./data/bov_train", help='Path to containing images and annAnnotations') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=101) parser.add_argument('--epochs', help='Number of epochs', type=int, default=40) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'voc': if parser.voc_train is None: raise ValueError( 'Must provide --voc_train when training on PASCAL VOC,') dataset_train = XML_VOCDataset( img_path=parser.voc_train + 'JPEGImages/', xml_path=parser.voc_train + 'Annotations/', class_list=class_list, transform=transforms.Compose( [Normalizer(), Augmenter(), ResizerMultiScale()])) if parser.voc_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = XML_VOCDataset( img_path=parser.voc_val + 'JPEGImages/', xml_path=parser.voc_val + 'Annotations/', class_list=class_list, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=1, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=2, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=2, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=15, verbose=True, mode="max") #scheduler = optim.lr_scheduler.StepLR(optimizer,8) loss_hist = collections.deque(maxlen=1024) retinanet.train() retinanet.module.freeze_bn() if not os.path.exists("./logs"): os.mkdir("./logs") log_file = open("./logs/log.txt", "w") print('Num training images: {}'.format(len(dataset_train))) best_map = 0 print("Training models...") for epoch_num in range(parser.epochs): #scheduler.step(epoch_num) retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): #print('iter num is: ', iter_num) try: #print(csv_eval.evaluate(dataset_val[:20], retinanet)[0]) #print(type(csv_eval.evaluate(dataset_val, retinanet))) #print('iter num is: ', iter_num % 10 == 0) optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss #print(loss) if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) if iter_num % 50 == 0: print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) log_file.write( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} \n' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) elif parser.dataset == 'voc' and parser.voc_val is not None: print('Evaluating dataset') mAP = voc_eval.evaluate(dataset_val, retinanet) try: is_best_map = mAP[0][0] > best_map best_map = max(mAP[0][0], best_map) except: pass if is_best_map: print("Get better map: ", best_map) torch.save(retinanet.module, './logs/{}_scale15_{}.pt'.format(epoch_num, best_map)) shutil.copyfile( './logs/{}_scale15_{}.pt'.format(epoch_num, best_map), "./best_models/model.pt") else: print("Current map: ", best_map) scheduler.step(best_map) retinanet.eval() torch.save(retinanet, './logs/model_final.pt')
def main(args=None): parser = argparse.ArgumentParser(description='Training script for training a EfficientDet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--phi', help='EfficientNet scaling coefficient.', type=int, default=0) parser.add_argument('--batch-size', help='Batch size', type=int, default=8) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(img_size=512)])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer(img_size=512)])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model efficientdet = model.efficientdet(num_classes=dataset_train.num_classes(), pretrained=True, phi=parser.phi) use_gpu = True if use_gpu: efficientdet = efficientdet.cuda() efficientdet = torch.nn.DataParallel(efficientdet).cuda() efficientdet.training = True optimizer = optim.Adam(efficientdet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) efficientdet.train() efficientdet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): efficientdet.train() efficientdet.module.freeze_bn() epoch_loss = [] print(('\n' + '%10s' * 5) % ('Epoch', 'gpu_mem', 'Loss', 'cls', 'rls')) pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train)) for iter_num, data in pbar: try: optimizer.zero_grad() classification_loss, regression_loss = efficientdet([data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) loss = (loss * iter_num) / (iter_num + 1) # update mean losses mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 # (GB) s = ('%10s' * 2 + '%10.3g' * 3) % ( '%g/%g' % (epoch_num, parser.epochs - 1), '%.3gG' % mem, np.mean(loss_hist), float(regression_loss), float(classification_loss)) pbar.set_description(s) del classification_loss del regression_loss except Exception as e: raise(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, efficientdet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, efficientdet) scheduler.step(np.mean(epoch_loss)) torch.save(efficientdet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) efficientdet.eval() torch.save(efficientdet, 'model_final.pt'.format(epoch_num))
def main(args=None): """ todo s: ################## ToDo ######################## 1. download more images using image_utils and isic-arhive. Also, use more online resources for data. 2. Use Augmentations fromPytorchSSD using pascal voc data format. 3. use pair augmentation, random erase 4. download more images for each classes. 5. preprocessing and feature extraction 6. bigger 500 px image size. big image tends to make 7. use ResNet-152 for better peromance. 8. adversarial training, use crosssentropy, focal loss 9. use similar optimizatio adam and learning rate schedule like wider face pedestrian dataset. 10.BGR to RGB 11. multi scale testing. 12. soft nms 13. save model and load from previous epoch 14. https://github.com/uoguelph-mlrg/Cutout """ parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', default="wider_pedestrain", help='Dataset type, must be one of csv or coco.') parser.add_argument( '--coco_path', default= "/media/milton/ssd1/research/competitions/data_wider_pedestrian/", help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=152) parser.add_argument('--epochs', help='Number of epochs', type=int, default=200) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train_wider_pedestrian', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val_wider_pedestrian', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'wider_pedestrain': dataset_train = CocoDataset(parser.coco_path, set_name='train_wider_pedestrian', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val_wider_pedestrian', transform=transforms.Compose( [Normalizer(), Resizer()])) # dataset_test = CocoDataset(parser.coco_path, set_name='test_wider_pedestrian', # transform=transforms.Compose([Normalizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') batch_size = 4 num_classes = 1 print("Total Train:{}".format(len(dataset_train))) sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=4, collate_fn=collater, batch_sampler=sampler) print("Total Validation:{}".format(len(dataset_val))) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=batch_size, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) best_saved_model_name = "checkpoint/resnet{}_{}_best_model.pth".format( parser.depth, parser.dataset) best_mAP = 0 start_epoch = 0 # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=num_classes, pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=num_classes, pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=num_classes, pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=num_classes, pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=num_classes, pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True optimizer = optim.Adam(retinanet.parameters(), lr=0.001) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) if use_gpu: retinanet_sk = copy.deepcopy( retinanet.cpu() ) # will hold the raw model, later it will be loaded with new model weight to test in seperate gpus. retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet) try: print("Loading model and optimizer from checkpoint '{}'".format( best_saved_model_name)) checkpoint = torch.load(best_saved_model_name) retinanet.load_state_dict(checkpoint['model'].state_dict()) best_mAP = checkpoint['map'] start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer'].state_dict()) # optimizer.load_state_dict(checkpoint['optimizer_state']) print("Loaded checkpoint '{}' (epoch {})".format( best_saved_model_name, checkpoint['epoch'])) start_epoch = checkpoint['epoch'] print( '==> Resuming Sucessfully from checkpoint from epoch {} with mAP {:.7f}..' .format(start_epoch, best_mAP)) except Exception as e: print("\nExcpetion: {}".format(repr(e))) print('\n==> Resume Failed...') retinanet.training = True total_loss = losses.loss loss_hist = collections.deque(maxlen=500) retinanet.train() freeze_bn(retinanet) print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(start_epoch, parser.epochs): retinanet.train() freeze_bn(retinanet) epoch_loss = [] # threshold=0.05 for iter_num, data in enumerate(dataloader_train): iter_per_epoch = len(dataset_train) / batch_size step = epoch_num * iter_per_epoch + iter_num if iter_num == 0: print('Iteration PEr eEpoch: {}'.format(iter_per_epoch)) try: optimizer.zero_grad() classification, regression, anchors = retinanet( data['img'].cuda().float()) classification_loss, regression_loss = total_loss( classification, regression, anchors, data['annot']) loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) writer.add_scalar('Classification loss', classification_loss, step) writer.add_scalar('Regression loss', regression_loss, step) writer.add_scalar("Running Loss", np.mean(loss_hist), step) msg = 'Epoch:{}, Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( epoch_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)) progress_bar(iter_num, iter_per_epoch, msg) # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) # break # if iter_num==0: # break if iter_num % 500 == 0: save_model(retinanet, optimizer, best_saved_model_name, epoch_num + 1, epoch_num) if False: retinanet.eval() test_data = get_test_loader_for_upload(1) # coco_eval.evaluate_wider_pedestrian_for_upload(epoch_num, test_data, retinanet, retinanet_sk) new_map = coco_eval.evaluate_wider_pedestrian( epoch_num, dataset_val, retinanet, retinanet_sk, threshold) # to validate epoch_saved_model_name = "checkpoint/resnet{}_{}_epoch_{}.pth".format( parser.depth, parser.dataset, epoch_num) save_model(retinanet, optimizer, epoch_saved_model_name, new_map, epoch_num) # print("\nepoch:{}, validation average precision score:{}".format(epoch_num, new_map)) if new_map == None: continue writer.add_scalar('validation mAP', new_map, epoch_num) scheduler.step(np.mean(epoch_loss)) if new_map > best_mAP: print( "Found new best model with mAP:{:.7f}, over {:.7f}" .format(new_map, best_mAP)) save_model(retinanet, optimizer, best_saved_model_name, new_map, epoch_num) best_mAP = new_map coco_eval.evaluate_wider_pedestrian_for_upload( parser.depth, epoch_num, test_data, retinanet, retinanet_sk) retinanet.train() except Exception as e: print(e) if parser.dataset == 'coco': print('\n==>Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, threshold=0.2) save_model(retinanet, optimizer, best_saved_model_name, 0.5, epoch_num) continue elif parser.dataset == 'wider_pedestrain': for threshold in range(16, 90, 10): threshold = threshold / 100 test_data = get_test_loader_for_upload(1) # coco_eval.evaluate_wider_pedestrian_for_upload(epoch_num, test_data, retinanet, retinanet_sk) new_map = coco_eval.evaluate_wider_pedestrian( epoch_num, dataset_val, retinanet, retinanet_sk, threshold) # to validate epoch_saved_model_name = "checkpoint/resnet{}_{}_epoch_{}.pth".format( parser.depth, parser.dataset, epoch_num) save_model(retinanet, optimizer, epoch_saved_model_name, new_map, epoch_num) # print("\nepoch:{}, validation average precision score:{}".format(epoch_num, new_map)) if new_map == None: continue writer.add_scalar('validation mAP', new_map, epoch_num) scheduler.step(np.mean(epoch_loss)) # if new_map>best_mAP: print( "Found new best model with mAP:{:.7f}, over {:.7f}".format( new_map, best_mAP)) save_model(retinanet, optimizer, best_saved_model_name, new_map, epoch_num) best_mAP = new_map coco_eval.evaluate_wider_pedestrian_for_upload( epoch_num, test_data, retinanet, retinanet_sk, threshold, new_map) retinanet.train()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', default="csv", help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', default="/home/mayank-s/PycharmProjects/Datasets/coco", help='Path to COCO directory') parser.add_argument( '--csv_train', default="berkely_ready_to_train_for_retinanet_pytorch.csv", help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', default="berkely_class.csv", help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', default= "/home/teai/Desktop/mayank/pytorch-retinanet-master (4)/berkely_ready_to_train_validation.csv", help= 'Path to file containing validation annotations (optional, see readme)' ) # parser.add_argument('--csv_val',default="/home/teai/Desktop/mayank/pytorch-retinanet-master (4)/berkely_ready_to_train_validation (copy).csv", help='Path to file containing validation annotations (optional, see readme)') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=200) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2014', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2014', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=0, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True # if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # checkpoint='/home/teai/Desktop/mayank/pytorch-retinanet-master (4)/checkpoint/old_datasets_with_missing_images_annotations/retina_fpn_11' checkpoint = '/home/teai/Desktop/mayank/pytorch-retinanet-master (4)/checkpoint/retina_fpn_univ_132' # retinanet = torch.load("./checkpoint/retina_fpn_132") retinanet = torch.load(checkpoint) for epoch_num in range(parser.epochs): # retinanet.train()retina_fpn_1 # retinanet.module.freeze_bn() epoch_loss = [] '''for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) print("fail") continue print("Saving model...") name = "./checkpoint/retina_fpn_" + str(epoch_num) torch.save(retinanet, name)''' if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) # print(mAP) scheduler.step(np.mean(epoch_loss)) '''
def main(arg=None): parser = argparse.ArgumentParser() parser.add_argument('--coco_path', type=str, default='/home/hoo/Dataset/COCO') parser.add_argument('--depth', type=int, default=3) parser.add_argument('--epoches', type=int, default=50) parser.add_argument('--phi', type=int, default=0) parser.add_argument('--backbone', type=str, default='efficientnet-b0') parser.add_argument('--backbone_pretrained', type=bool, default=True) parser.add_argument('--EfficientDet_pretrained', type=bool, default=False) parser.add_argument('--pretrained', type=str, default='./weights/retinanet_1.pth') parser.add_argument('--batch_size', type=int, default=24) parser = parser.parse_args(arg) dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) # print(dataset_train.num_classes()) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the Model efficientdet = RetinaHead(parser) efficientdet = torch.nn.DataParallel(efficientdet).cuda() if parser.EfficientDet_pretrained: state_dict = torch.load(parser.pretrained) # print(state_dict) efficientdet.module.load_state_dict(state_dict) efficientdet.training = True optimizer = optim.Adam(efficientdet.parameters(), lr=1e-3) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[3, 5, 7, 9, 11, 13, 15, 17, 19], gamma=0.5) for epoch_num in range(parser.epoches): efficientdet.train() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): break # try: # print(data) optimizer.zero_grad() # print(np.shape(data['annot'])) classification_loss, regression_loss = efficientdet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss))) if iter_num % 200 == 199: niter = epoch_num * len(dataloader_train) + iter_num # print(loss) writer.add_scalar('Train/Loss', loss, niter) writer.add_scalar('Train/Reg_Loss', regression_loss, niter) writer.add_scalar('Train/Cls_Loss', classification_loss, niter) del classification_loss del regression_loss # except Exception as e: # print(e) # continue # if iter_num == 20: # break # print('Evaluating dataset') mAP = coco_eval.evaluate_coco(dataset_val, efficientdet) # writer.add_scalar('Test/mAP', mAP, epoch_num) print('Save Model') # torch.save(efficientdet.module.state_dict(), './weights/retinanet_{}.pth'.format(epoch_num)) # scheduler.step(np.mean(epoch_loss)) scheduler.step(epoch=epoch_num)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--efficientdet', help='Use EfficientDet.', action="store_true") parser.add_argument('--scaling-compound', help='EfficientDet scaling compound phi.', type=int, default=0) parser.add_argument('--batch-size', help='Batchsize.', type=int, default=6) parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--print-model-complexity', help='Print model complexity.', action="store_true") parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=None) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) img_size = parser.scaling_compound * 128 + 512 # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer(img_size=img_size) ])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([ Normalizer(), Resizer(img_size=img_size) ])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer(img_size=img_size) ])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([ Normalizer(), Resizer(img_size=img_size) ])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: model = retinanet.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: model = retinanet.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: model = retinanet.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: model = retinanet.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: model = retinanet.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.efficientdet: model = efficientdet.efficientdet( num_classes=dataset_train.num_classes(), pretrained=True, phi=parser.scaling_compound) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152, or specify ' ) use_gpu = True if use_gpu: model = model.cuda() model = torch.nn.DataParallel(model).cuda() if parser.print_model_complexity: flops, params = get_model_complexity_info(model, (3, img_size, img_size), as_strings=True, print_per_layer_stat=True) print('{:<30} {:<8}'.format('Computational complexity: ', flops)) print('{:<30} {:<8}'.format('Number of parameters: ', params)) model.training = True optimizer = optim.SGD(model.parameters(), lr=4e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) model.train() model.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): model.train() model.module.freeze_bn() freeze_layer(model.module.efficientnet) epoch_loss = [] pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train)) for iter_num, data in pbar: optimizer.zero_grad() classification_loss, regression_loss = model( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available( ) else 0 pbar.set_description( f'{mem:.3g}G | {float(classification_loss):1.5f} | {float(regression_loss):1.5f} | {np.mean(loss_hist):1.5f}' ) #print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, model) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, model) scheduler.step(np.mean(epoch_loss)) torch.save(model.module_model_, '{}_model_{}.pt'.format(parser.dataset, epoch_num)) model.eval() torch.save(model, 'model_final.pt'.format(epoch_num))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=20) parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint') parser.add_argument('--batch_size', type=int, default=4, help='set the batch size') parser.add_argument('--learning_rate', '-lr', type=float, default=1e-3, help='set the learning rate') parser = parser.parse_args(args) batch_size = parser.batch_size learning_rate = parser.learning_rate start_epoch = 0 device = 'cuda' if torch.cuda.is_available() else 'cpu' best_acc = 0 # best test accuracy # mean & std for ROI extraction #mean = (0.1146, 0.1147, 0.1148) #std = (0.1089, 0.1090, 0.1090) # mean & std for QRCode extraction mean = (0.2405, 0.2416, 0.2427) std = (0.2194, 0.2208, 0.2223) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset( train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([ Normalizer(mean=mean, std=std), Augmenter(), #YFlipAugmenter(), #CropAugmenter(), #Rot180Augmenter(), Resizer() ])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([ Normalizer(mean=mean, std=std), Resizer() ])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=4, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=4, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.to(device) retinanet = torch.nn.DataParallel(retinanet) if parser.resume: # Load checkpoint print("==> Resuming from checkpoint") checkpoint = torch.load('./checkpoint/ckpt.pth') retinanet.load_state_dict(checkpoint['net']) best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] print('resume training from epoch:', start_epoch, " with accuracy:", best_acc) retinanet.training = True #optimizer = optim.Adam(retinanet.parameters(), lr=1e-3) optimizer = optim.SGD(retinanet.parameters(), lr=learning_rate, momentum=0.9) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(start_epoch, start_epoch + parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue writer.add_scalar('train_loss', np.mean(epoch_loss), epoch_num) if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet, iou_threshold=0.7, max_detections=5, score_threshold=0.2, epoch=epoch_num) print('mapROI:', mAP[0]) AP, num_annotations = mAP[0] acc = 100. * AP if acc > best_acc: print('Saving... acc:', acc) state = { 'net': retinanet.state_dict(), 'acc': acc, 'epoch': epoch_num, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(state, './checkpoint/ckpt.pth') torch.save(retinanet, './checkpoint/best.pth') best_acc = acc writer.add_scalar('test_acc', acc, epoch_num) scheduler.step(np.mean(epoch_loss)) #torch.save(retinanet.module, osp.join('checkpoints','{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num)) writer.close()
(GLOBAL_STEPS, epoch + 1, epoch_step + 1, steps_per_epoch, losses[0].mean(), losses[1].mean(), losses[2].mean(), cost_time, lr, loss.mean())) GLOBAL_STEPS += 1 for idx in range(4): summary.add_scalar(loss_list[idx], losses[idx][0], epoch) torch.save(model.state_dict(), "./checkpoint2/model_{}.pth".format(epoch + 1)) # if epoch + 1 > 23: model2 = FCOSDetector(mode="inference") model2 = torch.nn.DataParallel(model2) model2 = model2.cuda().eval() model2.load_state_dict(torch.load( "./checkpoint2/model_{}.pth".format(epoch + 1), map_location=torch.device('cuda:1')), strict=False) tt = coco_eval.evaluate_coco(val_dataset, model2) m_acc = tt[4].astype(float) if m_acc > best_acc: best_acc = m_acc best_ep = epoch + 1 data_ = dict() for idx, key in enumerate(eval_list): summary.add_scalar(key, tt[idx].astype(float), epoch) print("Best Acc of Medium : {0}, Best Ep of Medium : {1}".format( best_acc, best_ep)) summary.close()
("loss_regr", float(mean_loss_regr)), ("loss_sem", float(mean_loss_sem)), ]) for ii, vv in enumerate(mean_ious): train_loss_summary_dict[("iou_%d" % (ii + 1))] = vv retinanet.eval() print("EVAL ON TRAIN SET ({:d} samples)".format(parser.n_train_eval)) print("=" * 30) if not retinanet.no_rpn: train_apar_summary = coco_eval.evaluate_coco( dataset_train, retinanet, use_gpu=use_gpu, use_n_samples=parser.n_train_eval, save=False, returntype='dict', coco_header=coco_header, **{ kk: vv for kk, vv in parser.__dict__.items() if str(kk).startswith('w_') }) if coco_header is None: coco_header = list( set((train_apar_summary.keys())) - set(train_loss_summary_dict.keys())) train_apar_summary.update(train_loss_summary_dict) else: train_apar_summary = train_loss_summary_dict print("\t".join([ '{}: {:.4f}'.format(kk, vv)
def main(): assert torch.__version__.split('.')[0] == '1' print('CUDA available: {}'.format(torch.cuda.is_available())) sampler = AspectRatioBasedSampler(dataset_train, batch_size=train_batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=num_workers, collate_fn=collater, batch_sampler=sampler) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=val_batch_size, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=num_workers, collate_fn=collater, batch_sampler=sampler_val) # initialize the model retinanet, device = model_init(model_name) retinanet.to(device) retinanet = torch.nn.DataParallel(retinanet).to(device) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=patience, verbose=verbose) loss_hist = collections.deque(maxlen=maxlen) retinanet.train() retinanet.module.freeze_bn() print('Number of training images: {} Number of validation images: {}'.format(len(dataset_train), len(dataset_val))) for epoch_num in range(epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] print('Training dataset') for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, threshold) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.state_dict(), f'CP_epoch{epoch_num + 1}.pth')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory', type=str, default='./data/coco') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--checkpoint', help='The path to the checkpoint.', type=str, default=None) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--batch_size', help='Number of batch', type=int, default=16) parser.add_argument('--gpu_ids', help='Gpu parallel', type=str, default='1, 2') parser = parser.parse_args(args) # Create the data lodaders dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() gpu_ids = parser.gpu_ids.split(',') device = torch.device("cuda:" + gpu_ids[0]) torch.cuda.set_device(device) gpu_ids = list(map(int, gpu_ids)) retinanet = torch.nn.DataParallel(retinanet, device_ids=gpu_ids).to(device) if parser.checkpoint: pretrained = torch.load(parser.checkpoint).state_dict() retinanet.module.load_state_dict(pretrained) # add tensorboard to record train log retinanet.training = True writer = SummaryWriter('./log') # writer.add_graph(retinanet, input_to_model=[images, labels]) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].to(device), data['ann'].to(device)]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) writer.add_scalar('Loss/train', loss, iter_num) writer.add_scalar('Loss/reg_loss', regression_loss, iter_num) writer.add_scalar('Loss/cls_loss', classification_loss, iter_num) epoch_loss.append(float(loss)) if (iter_num + 1) % 1000 == 0: print('Save model') torch.save( retinanet.module, 'COCO_retinanet_epoch{}_iter{}.pt'.format( epoch_num, iter_num)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, writer) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, 'COCO_retinanet_{}.pt'.format(epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') # dataset_train = CocoDataset(parser.coco_path, set_name='trainval35k', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val5k', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') #sampler = AspectRatioBasedSampler(dataset_train, batch_size=16, drop_last=False) #dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_val.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True retinanet.load_state_dict( torch.load("coco_resnet_50_map_0_335_state_dict.pt", encoding='latin1')) if use_gpu: retinanet = retinanet.cuda() # retinanet = torch.nn.DataParallel(retinanet).cuda() #retinanet.training = True #optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) #scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) #loss_hist = collections.deque(maxlen=500) retinanet.eval() #retinanet.module.freeze_bn() # print('Num training images: {}'.format(len(dataset_train))) if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--dataset', help='Dataset type, must be one of csv, coco or openimages') parser.add_argument('--data_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--resume', help='Checkpoint to load') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--bs', help='Batch size', type=int, default=64) parser.add_argument('--net', help='Network to use', default='fasterrcnn') parser.add_argument('--log_interval', help='Iterations before outputting stats', type=int, default=1) parser.add_argument( '--checkpoint_interval', help='Iterations before saving an intermediate checkpoint', type=int, default=80) parser.add_argument('--iterations', type=int, help='Iterations for every batch', default=32) parser = parser.parse_args(args) # This becomes the minibatch size parser.bs = parser.bs // parser.iterations print('With {} iterations the effective batch size is {}'.format( parser.iterations, parser.bs)) # Create the data loaders if parser.dataset == 'coco': raise NotImplementedError() if parser.data_path is None: raise ValueError('Must provide --data_path when training on COCO,') dataset_train = CocoDataset(parser.data_path, set_name='train2017', transform=Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.data_path, set_name='val2017', transform=Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': raise NotImplementedError() if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'openimages': if parser.data_path is None: raise ValueError( 'Must provide --data_path when training on OpenImages') dataset_train = OidDataset(parser.data_path, subset='train', transform=Compose([ ToTensor(), Augment(), Resizer(min_side=600, max_side=1000) ])) dataset_val = OidDataset(parser.data_path, subset='validation', transform=Compose([ ToTensor(), Resizer(min_side=600, max_side=1000) ])) elif parser.dataset == 'dummy': # dummy dataset used only for debugging purposes dataset_train = DummyDataset( transform=Compose([ToTensor(), Resizer()])) # dataset_val = DummyDataset(transform=Compose([ToTensor(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = BalancedSampler(dataset_train, batch_size=parser.bs, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collate_fn, batch_sampler=sampler) # if dataset_val is not None: # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) # dataloader_val = DataLoader(dataset_val, num_workers=12, collate_fn=collate_fn, batch_sampler=sampler_val) # Create the model model = create_detection_model(dataset_train.num_classes(), parser) # Create the experiment folder experiment_fld = 'experiment_{}_{}_resnet{}_{}'.format( parser.net, parser.dataset, parser.depth, time.strftime("%Y%m%d%H%M%S", time.localtime())) experiment_fld = os.path.join('outputs', experiment_fld) if not os.path.exists(experiment_fld): os.makedirs(experiment_fld) logger = SummaryWriter(experiment_fld) use_gpu = True if use_gpu: model = model.cuda() model = torch.nn.DataParallel(model).cuda() model.training = True optimizer = optim.Adam(model.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # Load checkpoint if needed start_epoch = 0 if parser.resume: print('Loading checkpoint {}'.format(parser.resume)) checkpoint = torch.load(parser.resume) start_epoch = checkpoint['epoch'] model.module.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) print('Checkpoint loaded!') loss_hist = collections.deque(maxlen=500) model.train() # model.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in tqdm.trange(start_epoch, parser.epochs): model.train() # model.module.freeze_bn() epoch_loss = [] log_losses_mean = {} running_loss_sum = 0 data_progress = tqdm.tqdm(dataloader_train) old_tensors_set = {} optimizer.zero_grad() for minibatch_idx, data in enumerate(data_progress): images, targets = data images = list(image.cuda().float() for image in images) targets = [{k: v.cuda() for k, v in t.items()} for t in targets] #images, targets = images.cuda(), targets.cuda() loss_dict = model(images, targets) #classification_loss = classification_loss.mean() #regression_loss = regression_loss.mean() #loss = classification_loss + regression_loss loss = sum(loss for loss in loss_dict.values()) monitor_loss = loss.clone().detach() loss /= parser.iterations running_loss_sum += float(loss.item()) loss.backward() if len(log_losses_mean) == 0: log_losses_mean = clone_tensor_dict(loss_dict) log_losses_mean['total_loss'] = float(monitor_loss.item()) else: loss_dict['total_loss'] = monitor_loss log_losses_mean = Counter( clone_tensor_dict(loss_dict)) + Counter(log_losses_mean) if (minibatch_idx + 1) % parser.iterations == 0: data_progress.set_postfix( dict(it=minibatch_idx // parser.iterations, loss=running_loss_sum)) # all minibatches have been accumulated. Zero the grad optimizer.step() optimizer.zero_grad() running_loss_sum = 0 # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) # loss_hist.append(float(loss)) # epoch_loss.append(float(loss)) if (minibatch_idx + 1) % (parser.log_interval * parser.iterations) == 0: # compute the mean log_losses_mean = { k: (v / (parser.log_interval * parser.iterations)) for k, v in log_losses_mean.items() } logger.add_scalars( "logs/losses", log_losses_mean, epoch_num * len(dataloader_train) + minibatch_idx) log_losses_mean = {} # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) if (minibatch_idx + 1) % (parser.checkpoint_interval * parser.iterations) == 0: # Save an intermediate checkpoint save_checkpoint( { 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'epoch': epoch_num }, experiment_fld, overwrite=True) if (minibatch_idx + 1) % (1 * parser.iterations) == 0: # flush cuda memory every tot iterations torch.cuda.empty_cache() '''for img in images: del img for tgt in targets: for t in tgt.values(): del t del loss''' if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, model) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, model) # TODO: write evaluation code for openimages scheduler.step(np.mean(epoch_loss)) save_checkpoint( { 'model': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'epoch': epoch_num, }, experiment_fld, overwrite=False) model.eval()
def main(args=None): #def main(epoch): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) #parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint') parser.add_argument('--start-epoch', default=0, type=int, help='manual epoch number (useful on restarts)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser = parser.parse_args(args) #args = parser.parse_args() #parser = parser.parse_args(epoch) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() #retinanet().load_state_dict(torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/resnet50-19c8e357.pth')) #if True: #print('==> Resuming from checkpoint..') #checkpoint = torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/coco_retinanet_2.pt') #retinanet().load_state_dict(checkpoint) #best_loss = checkpoint['loss'] #start_epoch = checkpoint['epoch'] retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True #optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) optimizer = optim.SGD(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() #retinanet.freeze_bn() #for train from a middle state retinanet.module.freeze_bn() #for train from the very beginning print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.start_epoch, parser.epochs): if parser.resume: if os.path.isfile(parser.resume): print("=>loading checkpoint '{}'".format(parser.resume)) checkpoint = torch.load(parser.resume) print(parser.start_epoch) #parser.start_epoch = checkpoint['epoch'] #retinanet.load_state_dict(checkpoint['state_dict']) retinanet=checkpoint #retinanet.load_state_dict(checkpoint) print(retinanet) #optimizer.load_state_dict(checkpoint) print("=> loaded checkpoint '{}' (epoch {})".format(parser.resume, checkpoint)) else: print("=> no checkpoint found at '{}'".format(parser.resume)) retinanet.train() retinanet.freeze_bn() #retinanet.module.freeze_bn() if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda()]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) #torch.save(retinanet.module, '{}_retinanet_101_{}.pt'.format(parser.dataset, epoch_num)) torch.save(retinanet, '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num)) name = '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num) parser.resume = '/users/wenchi/ghwwc/pytorch-retinanet-master_new/name' retinanet.eval() torch.save(retinanet, 'model_final_dilation_experiment1.pt'.format(epoch_num))
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv, coco or openimages') parser.add_argument('--data_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--resume', help='Checkpoint to load the model from') parser.add_argument('--resume_attr', help='Checkpoint to load the attributes model from') parser.add_argument('--resume_rel', help='Checkpoint to load the relationships from') parser.add_argument('--detector_snapshot', help='Detector snapshot') parser.add_argument('--finetune_detector', action='store_true', default=False, help='Enable finetuning the detector') parser.add_argument('--lr_step_size', type=int, default=20, help="After how many epochs the lr is decreased") parser.add_argument('--lr', type=int, default=1e-4, help="Initial learning rate") parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--bs', help='Batch size', type=int, default=64) parser.add_argument('--net', help='Network to use', default='fasterrcnn') parser.add_argument('--train_rel', action='store_true', default=False, help='Enable training relationships') parser.add_argument('--train_attr', action='store_true', default=False, help='Enable training attributes') parser.add_argument('--log_interval', help='Iterations before outputting stats', type=int, default=1) parser.add_argument('--checkpoint_interval', help='Iterations before saving an intermediate checkpoint', type=int, default=80) parser.add_argument('--iterations', type=int, help='Iterations for every batch', default=32) parser = parser.parse_args() # asserts assert parser.train_rel or parser.train_attr, "You have to train one of attribute or relation networks!" assert not (not parser.train_rel and parser.resume_rel), "It is useless to load relationships when you do not train them!" assert not (not parser.train_attr and parser.resume_attr), "It is useless to load attributes when you do not train them!" # This becomes the minibatch size parser.bs = parser.bs // parser.iterations print('With {} iterations the effective batch size is {}'.format(parser.iterations, parser.bs)) # Create the data loaders if parser.dataset == 'openimages': if parser.data_path is None: raise ValueError('Must provide --data_path when training on OpenImages') dataset_train = OidDatasetVRD(parser.data_path, subset='train', transform=Compose( [ToTensor(), Augment(), Resizer(min_side=600, max_side=1000)])) # dataset_val = OidDatasetVRD(parser.data_path, subset='validation', # transform=Compose([ToTensor(), Resizer(min_side=600, max_side=1000)])) elif parser.dataset == 'dummy': # dummy dataset used only for debugging purposes raise NotImplementedError() else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') # if training one of relationships or attributes, balance! # if not (parser.train_attr and parser.train_rel): print('Dataloader is using the BalancedSampler!') sampler_train = BalancedSampler(dataset_train, batch_size=parser.bs, train_rel=parser.train_rel, train_attr=parser.train_attr) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collate_fn, batch_sampler=sampler_train) # dataloader_train = DataLoader(dataset_train, num_workers=8, batch_size=parser.bs, collate_fn=collate_fn, shuffle=True) # if dataset_val is not None: # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) # dataloader_val = DataLoader(dataset_val, num_workers=12, collate_fn=collate_fn, batch_sampler=sampler_val) # Create the detection model detector = create_detection_model(dataset_train.num_classes(), parser) # Create the experiment folder if parser.train_attr and parser.train_rel: mode = 'attr-and-rel' elif parser.train_attr: mode = 'only-attr' elif parser.train_rel: mode = 'only-rel' experiment_fld = 'vrd_{}_experiment_{}_{}_resnet{}_{}'.format(mode, parser.net, parser.dataset, parser.depth, time.strftime("%Y%m%d%H%M%S", time.localtime())) experiment_fld = os.path.join('outputs', experiment_fld) if not os.path.exists(experiment_fld): os.makedirs(experiment_fld) logger = SummaryWriter(experiment_fld) use_gpu = True #if use_gpu: # detector = detector.cuda() # detector = torch.nn.DataParallel(detector).cuda() if parser.detector_snapshot: checkpoint = torch.load(parser.detector_snapshot) weights = checkpoint['model'] weights = {k.replace('module.', ''): v for k, v in weights.items()} detector.load_state_dict(weights) print('Correctly loaded the detector checkpoint {}'.format(parser.detector_snapshot)) # Create the VRD model given the detector model = VRD(detector, dataset=dataset_train, train_relationships=parser.train_rel, train_attributes=parser.train_attr, finetune_detector=parser.finetune_detector) if use_gpu: model = model.cuda() model = torch.nn.DataParallel(model).cuda() optimizer = optim.Adam(model.parameters(), lr=parser.lr) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=parser.lr_step_size) # Load checkpoint if needed start_epoch = 0 # load relationships if parser.resume_rel: print('Loading relationship checkpoint {}'.format(parser.resume_rel)) rel_checkpoint = torch.load(parser.resume_rel) model.module.relationships_net.load_state_dict(rel_checkpoint['model_rel']) if not parser.resume_attr: print('Resuming also scheduler and optimizer...') start_epoch = rel_checkpoint['epoch'] optimizer.load_state_dict(rel_checkpoint['optimizer']) scheduler.load_state_dict(rel_checkpoint['scheduler']) if parser.resume_attr: print('Loading attributes checkpoint {}'.format(parser.resume_attr)) attr_checkpoint = torch.load(parser.resume_attr) model.module.attributes_net.load_state_dict(attr_checkpoint['model_attr']) if not parser.resume_rel: print('Resuming also scheduler and optimizer...') start_epoch = attr_checkpoint['epoch'] optimizer.load_state_dict(attr_checkpoint['optimizer']) scheduler.load_state_dict(attr_checkpoint['scheduler']) if parser.resume: print('Loading both attributes and relationships models {}'.format(parser.resume)) checkpoint = torch.load(parser.resume) model.module.relationships_net.load_state_dict(checkpoint['model_rel']) model.module.attributes_net.load_state_dict(checkpoint['model_attr']) start_epoch = checkpoint['epoch'] optimizer.load_state_dict(checkpoint['optimizer']) scheduler.load_state_dict(checkpoint['scheduler']) print('Checkpoint loaded!') loss_hist = collections.deque(maxlen=500) model.train() # model.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in tqdm.trange(start_epoch, parser.epochs): logger.add_scalar("learning_rate", optimizer.param_groups[0]['lr'], epoch_num * len(dataloader_train)) model.train() # model.module.freeze_bn() epoch_loss = [] log_losses_mean = {} running_loss_sum = 0 data_progress = tqdm.tqdm(dataloader_train) old_tensors_set = {} optimizer.zero_grad() for minibatch_idx, data in enumerate(data_progress): images, targets = data images = list(image.cuda().float() for image in images) targets = [{k: v.cuda() for k, v in t.items()} for t in targets] #images, targets = images.cuda(), targets.cuda() loss_dict = model(images, targets) #classification_loss = classification_loss.mean() #regression_loss = regression_loss.mean() #loss = classification_loss + regression_loss loss = sum(loss for loss in loss_dict.values()) monitor_loss = loss.clone().detach() loss /= parser.iterations running_loss_sum += float(loss.item()) loss.backward() if len(log_losses_mean) == 0: log_losses_mean = clone_tensor_dict(loss_dict) log_losses_mean['total_loss'] = float(monitor_loss.item()) else: loss_dict['total_loss'] = monitor_loss log_losses_mean = Counter(clone_tensor_dict(loss_dict)) + Counter(log_losses_mean) if (minibatch_idx + 1) % parser.iterations == 0: data_progress.set_postfix(dict(it=minibatch_idx // parser.iterations, loss=running_loss_sum)) # all minibatches have been accumulated. Zero the grad optimizer.step() optimizer.zero_grad() running_loss_sum = 0 # torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) # loss_hist.append(float(loss)) # epoch_loss.append(float(loss)) if (minibatch_idx + 1) % (parser.log_interval * parser.iterations) == 0: # compute the mean log_losses_mean = {k: (v / (parser.log_interval * parser.iterations)) for k, v in log_losses_mean.items()} logger.add_scalars("logs/losses", log_losses_mean, epoch_num * len(dataloader_train) + minibatch_idx) log_losses_mean = {} # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) if (minibatch_idx + 1) % (parser.checkpoint_interval * parser.iterations) == 0: # Save an intermediate checkpoint save_checkpoint({ 'model_rel': model.module.relationships_net.state_dict() if parser.train_rel else None, 'model_attr': model.module.attributes_net.state_dict() if parser.train_attr else None, 'model_det': model.module.detector.state_dict() if parser.finetune_detector else None, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'epoch': epoch_num }, experiment_fld, overwrite=True) if (minibatch_idx + 1) % 5 == 0: # flush cuda memory every tot iterations torch.cuda.empty_cache() if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, model) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, model) # TODO: write evaluation code for openimages scheduler.step() save_checkpoint({ 'model_rel': model.module.relationships_net.state_dict() if parser.train_rel else None, 'model_attr': model.module.attributes_net.state_dict() if parser.train_attr else None, 'model_det': model.module.detector.state_dict() if parser.finetune_detector else None, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict(), 'epoch': epoch_num }, experiment_fld, overwrite=False) model.eval()