def main_worker(gpu, ngpus_per_node, args): args.gpu = gpu if args.gpu is not None: print("Use GPU: {} for training".format(args.gpu)) if args.distributed: if args.dist_url == "env://" and args.rank == -1: # args.rank = int(os.environ["RANK"]) args.rank = 1 if args.multiprocessing_distributed: # For multiprocessing distributed training, rank needs to be the # global rank among all the processes args.rank = args.rank * ngpus_per_node + gpu dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) checkpoint = [] if (args.resume is not None): if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) if args.gpu is None: checkpoint = torch.load(args.resume) else: # Map model to be loaded to specified single gpu. loc = 'cuda:{}'.format(args.gpu) checkpoint = torch.load(args.resume, map_location=loc) params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network args.start_epoch = params.start_epoch + 1 del params model = EfficientDet(num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class'], gpu=args.gpu) if (args.resume is not None): model.load_state_dict(checkpoint['state_dict']) del checkpoint if args.distributed: # For multiprocessing distributed, DistributedDataParallel constructor # should always set the single device scope, otherwise, # DistributedDataParallel will use all available devices. if args.gpu is not None: torch.cuda.set_device(args.gpu) model.cuda(args.gpu) # When using a single GPU per process and per # DistributedDataParallel, we need to divide the batch size # ourselves based on the total number of GPUs we have args.batch_size = int(args.batch_size / ngpus_per_node) args.workers = int( (args.workers + ngpus_per_node - 1) / ngpus_per_node) model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.gpu], find_unused_parameters=True) print('Run with DistributedDataParallel with divice_ids....') else: model.cuda() # DistributedDataParallel will divide and allocate batch_size to all # available GPUs if device_ids are not set model = torch.nn.parallel.DistributedDataParallel(model) print('Run with DistributedDataParallel without device_ids....') elif args.gpu is not None: torch.cuda.set_device(args.gpu) model = model.cuda(args.gpu) else: print('Run with DataParallel ....') model = torch.nn.DataParallel(model).cuda() # Training dataset train_dataset = [] if (args.dataset == 'VOC'): # train_dataset = VOCDetection(root=args.dataset_root, # transform=get_augumentation(phase='train', width=EFFICIENTDET[args.network]['input_size'], height=EFFICIENTDET[args.network]['input_size'])) train_dataset = VOCDetection(root=args.dataset_root, transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ])) elif (args.dataset == 'COCO'): train_dataset = CocoDataset( root_dir=args.dataset_root, set_name='train2017', transform=get_augumentation( phase='train', width=EFFICIENTDET[args.network]['input_size'], height=EFFICIENTDET[args.network]['input_size'])) # train_loader = DataLoader(train_dataset, # batch_size=args.batch_size, # num_workers=args.workers, # shuffle=True, # collate_fn=detection_collate, # pin_memory=True) train_loader = DataLoader(train_dataset, batch_size=args.batch_size, num_workers=args.workers, shuffle=True, collate_fn=collater, pin_memory=True) # define loss function (criterion) , optimizer, scheduler optimizer = optim.AdamW(model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) cudnn.benchmark = True for epoch in range(args.start_epoch, args.num_epoch): train(train_loader, model, scheduler, optimizer, epoch, args) state = { 'epoch': epoch, 'parser': args, 'state_dict': get_state_dict(model) } torch.save( state, './weights/checkpoint_{}_{}_{}.pth'.format(args.dataset, args.network, epoch))
def main(args=None): parser = argparse.ArgumentParser( description="Simple training script for training a RetinaNet network.") parser.add_argument( "--dataset", help="Dataset type, must be one of csv or coco or ycb.") parser.add_argument("--path", help="Path to dataset directory") parser.add_argument( "--csv_train", help="Path to file containing training annotations (see readme)") parser.add_argument("--csv_classes", help="Path to file containing class list (see readme)") parser.add_argument("--csv_val", help="Path to file containing validation annotations " "(optional, see readme)") parser.add_argument( "--depth", help="Resnet depth, must be one of 18, 34, 50, 101, 152", type=int, default=50) parser.add_argument("--epochs", help="Number of epochs", type=int, default=100) parser.add_argument("--evaluate_every", default=20, type=int) parser.add_argument("--print_every", default=20, type=int) parser.add_argument('--distributed', action="store_true", help='Run model in distributed mode with DataParallel') parser = parser.parse_args(args) # Create the data loaders if parser.dataset == "coco": if parser.path is None: raise ValueError( "Must provide --path when training on non-CSV datasets") dataset_train = CocoDataset(parser.path, ann_file="instances_train2014.json", set_name="train2014", transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer(min_side=512, max_side=512) ])) dataset_val = CocoDataset(parser.path, ann_file="instances_val2014.cars.json", set_name="val2014", transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == "ycb": dataset_train = YCBDataset(parser.path, "image_sets/train.txt", transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer(min_side=512, max_side=512) ]), train=True) dataset_val = YCBDataset(parser.path, "image_sets/val.txt", transform=transforms.Compose( [Normalizer(), Resizer()]), train=False) elif parser.dataset == "csv": if parser.csv_train is None: raise ValueError("Must provide --csv_train when training on COCO,") if parser.csv_classes is None: raise ValueError( "Must provide --csv_classes when training on COCO,") dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print("No validation annotations provided.") else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( "Dataset type not understood (must be csv or coco), exiting.") sampler = AspectRatioBasedSampler(dataset_train, batch_size=12, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=4, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( "Unsupported model depth, must be one of 18, 34, 50, 101, 152") print("CUDA available: {}".format(torch.cuda.is_available())) if torch.cuda.is_available(): device = "cuda" else: device = "cpu" retinanet = retinanet.to(device) if parser.distributed: retinanet = torch.nn.DataParallel(retinanet) optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) print("Num training images: {}".format(len(dataset_train))) best_mean_avg_prec = 0.0 for epoch_num in range(parser.epochs): retinanet.train() retinanet.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data["img"].to(device).float(), data["annot"]]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss.item())) epoch_loss.append(float(loss.item())) if parser.print_every % iter_num == 0: print("Epoch: {} | Iteration: {}/{} | " "Classification loss: {:1.5f} | " "Regression loss: {:1.5f} | " "Running loss: {:1.5f}".format( epoch_num, iter_num, len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if ((epoch_num + 1) % parser.evaluate_every == 0) or epoch_num + 1 == parser.epochs: mAP = 0.0 if parser.dataset == "coco": print("Evaluating dataset") mAP = coco_eval.evaluate_coco(dataset_val, retinanet) else: print("Evaluating dataset") AP = eval.evaluate(dataset_val, retinanet) mAP = np.asarray([x[0] for x in AP.values()]).mean() print("Val set mAP: ", mAP) if mAP > best_mean_avg_prec: best_mean_avg_prec = mAP torch.save( retinanet.state_dict(), "{}_retinanet_best_mean_ap_{}.pt".format( parser.dataset, epoch_num)) scheduler.step(np.mean(epoch_loss)) retinanet.eval() torch.save(retinanet.state_dict(), "retinanet_model_final.pt")
help='Checkpoint state_dict file to resume training from') args = parser.parse_args() if(args.weight is not None): resume_path = str(args.weight) print("Loading checkpoint: {} ...".format(resume_path)) checkpoint = torch.load( args.weight, map_location=lambda storage, loc: storage) params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network model = EfficientDet( num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class'], is_training=False, threshold=args.threshold, iou_threshold=args.iou_threshold) model.load_state_dict(checkpoint['state_dict']) model = model.cuda() if(args.dataset == 'VOC'): valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'test')], transform=transforms.Compose([Normalizer(), Resizer()])) evaluate(valid_dataset, model) else: valid_dataset = CocoDataset(root_dir=args.dataset_root, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) evaluate_coco(valid_dataset, model)
resume_path = str(args.weight) print("Loading checkpoint: {} ...".format(resume_path)) checkpoint = torch.load(args.weight, map_location=lambda storage, loc: storage) params = checkpoint['parser'] args.num_class = params.num_class args.network = params.network model = EfficientDet(num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class'], is_training=False, threshold=args.threshold, iou_threshold=args.iou_threshold) model.load_state_dict(checkpoint['state_dict']) model = model.cuda() if (args.dataset == 'VOC'): valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'test')], transform=transforms.Compose( [Normalizer(), Resizer()])) evaluate(valid_dataset, model) else: valid_dataset = CocoDataset(root_dir=args.dataset_root, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) evaluate_coco(valid_dataset, model)
model = EfficientDet(num_classes=args.num_class, network=args.network, W_bifpn=EFFICIENTDET[args.network]['W_bifpn'], D_bifpn=EFFICIENTDET[args.network]['D_bifpn'], D_class=EFFICIENTDET[args.network]['D_class'], is_training=False, threshold=args.threshold, iou_threshold=args.iou_threshold) model.load_state_dict(checkpoint['state_dict']) model = model.cuda() if (args.dataset == 'VOC'): valid_dataset = VOCDetection(root=args.dataset_root, image_sets=[('2007', 'test')], transform=transforms.Compose( [Normalizer(), Resizer()])) evaluate(valid_dataset, model) elif args.dataset == 'COCO': valid_dataset = CocoDataset(root_dir=args.dataset_root, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) evaluate_coco(valid_dataset, model) elif args.dataset == 'XVIEW': normalizer = Normalizer(mu=np.array([0.23582, 0.19489, 0.15979]), sig=np.array([0.11761, 0.096071, 0.086455])) valid_dataset = XView(root=args.dataset_root + '/val', transform=transforms.Compose( [normalizer, Resizer()])) evaluate(valid_dataset, model)
momentum=0.9, nesterov=True) train_params = { 'batch_size': opt.batch_size, 'shuffle': False, 'drop_last': False, 'collate_fn': collater, 'num_workers': opt.num_workers, 'pin_memory': True } ''' trainset = CocoDataset(root_dir = os.path.join(opt.data_path,params.project_name),set = params.val_set, transform=transforms.Compose([Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef])])) ''' data_dir = os.path.join(opt.data_path, params.train_set) ann_dir = os.path.join(opt.data_path, 'annotations', params.train_set) tfs = transforms.Compose([ Normalizer(mean=params.mean, std=params.std), Augmenter(), Resizer(input_sizes[opt.compound_coef]) ]) for f in os.listdir(data_dir): img_dir = os.path.join(data_dir, f) ann_file = os.path.join(ann_dir, f + '.json') trainset = VIDDataset(img_dir, ann_file, tfs) trainloader = DataLoader(trainset, **train_params) train(trainloader, model, criterion, optimizer, opt)