def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--model_path', help='Path to model', type=str) parser = parser.parse_args(args) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) dataset_val.image_ids = dataset_val.image_ids[:50] # TEST # Create the model retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet) retinanet.training = False retinanet.eval() retinanet.module.freeze_bn() coco_eval.evaluate_coco(dataset_val, retinanet)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=200) parser.add_argument('--pt_to_loader', help='continue train with model') parser.add_argument('--pt_to_save', help='save model path') parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) #sampler = AspectRatioBasedSampler(dataset_train, batch_size=32, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=2, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) #sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=12, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=2, collate_fn=collater, batch_sampler=sampler_val) # Create the model # 创建模型,pt_to_loader继续训练,否则重新训练 if parser.pt_to_loader: print("continue train model with model {}".format(parser.pt_to_loader)) retinanet = torch.load(parser.pt_to_loader) else: if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True # 设定使用gpu训练 if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True #optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] print('learnint rate:', optimizer.param_groups[0]['lr']) for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) if (iter_num + 1) % 50 == 0: print( 'Epoch: {} | Iteration: {} | LR: {}| Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num + 1, iter_num + 1, optimizer.param_groups[0]['lr'], float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) if (epoch_num + 1) % 5 == 0: torch.save( retinanet.module, os.path.join( parser.pt_to_save, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num + 1))) retinanet.eval() torch.save(retinanet, 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--local_rank', help='Local rank', type=int, default=0) parser.add_argument('--distributed', action='store_true') parser.add_argument('--pretrained', action='store_true') parser = parser.parse_args(args) torch.cuda.set_device(parser.local_rank) DISTRIBUTED = parser.distributed and config.DISTRIBUTED if DISTRIBUTED: distributed.init_process_group(backend="nccl") device = torch.device(f'cuda:{parser.local_rank}') # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') if DISTRIBUTED: sampler = DistributedSampler(dataset_train) dataloader_train = DataLoader(dataset_train, num_workers=4, batch_size=batch_size, collate_fn=collater, sampler=sampler, pin_memory=True, drop_last=True) if dataset_val is not None: sampler_val = DistributedSampler(dataset_val) dataloader_val = DataLoader(dataset_val, batch_size=1, num_workers=4, collate_fn=collater, sampler=sampler_val, pin_memory=True, drop_last=True) else: sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=4, collate_fn=collater, batch_sampler=sampler, pin_memory=True) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=4, collate_fn=collater, batch_sampler=sampler_val, pin_memory=True) # Create the model if parser.depth == 18: retinanet = model.retinanet18(num_classes=dataset_train.num_classes(), pretrained=parser.pretrained) elif parser.depth == 34: retinanet = model.retinanet34(num_classes=dataset_train.num_classes(), pretrained=parser.pretrained) elif parser.depth == 50: retinanet = model.retinanet50(num_classes=dataset_train.num_classes(), pretrained=parser.pretrained) elif parser.depth == 101: retinanet = model.retinanet101(num_classes=dataset_train.num_classes(), pretrained=parser.pretrained) elif parser.depth == 152: retinanet = model.retinanet152(num_classes=dataset_train.num_classes(), pretrained=parser.pretrained) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if use_cuda: retinanet = retinanet.cuda() if RESTORE: retinanet.load_state_dict(torch.load(RESTORE)) if DISTRIBUTED: retinanet = torch.nn.parallel.DistributedDataParallel( retinanet, device_ids=[parser.local_rank]) print("Let's use", parser.local_rank, "GPU!") retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() if DISTRIBUTED: retinanet.module.freeze_bn() else: retinanet.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): save_to_disk = parser.local_rank == 0 retinanet.train() if DISTRIBUTED: retinanet.module.freeze_bn() else: retinanet.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if use_cuda: classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot'].cuda()]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) if save_to_disk: print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if save_to_disk: if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) if DISTRIBUTED: torch.save( retinanet.module.state_dict(), '{}/{}_retinanet_{}.pt'.format(checkpoints_dir, parser.dataset, epoch_num)) else: torch.save( retinanet.state_dict(), '{}/{}_retinanet_{}.pt'.format(checkpoints_dir, parser.dataset, epoch_num))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--model', help='Path to model (.pt) file.') parser.add_argument('--finetune', help='if load trained retina model', type=bool, default=False) parser.add_argument('--gpu', help='', type=bool, default=False) parser.add_argument('--batch_size', help='', type=int, default=2) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') #sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) sampler = AspectRatioBasedSampler(dataset_train, parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model ''' if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') ''' use_gpu = parser.gpu #import pdb #pdb.set_trace() #读coco预训练模型 retinanet = model.resnet50(num_classes=80, pretrained=True) retinanet.load_state_dict(torch.load(parser.model)) for param in retinanet.parameters(): param.requires_grad = False retinanet.regressionModel = model.RegressionModel(256) retinanet.classificationModel = model.ClassificationModel( 256, num_classes=dataset_train.num_classes()) prior = 0.01 retinanet.classificationModel.output.weight.data.fill_(0) retinanet.classificationModel.output.bias.data.fill_(-math.log( (1.0 - prior) / prior)) retinanet.regressionModel.output.weight.data.fill_(0) retinanet.regressionModel.output.bias.data.fill_(0) # for m in retinanet.classificationModel.modules(): # if isinstance(m, nn.Conv2d): # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels # m.weight.data.normal_(0, math.sqrt(2. / n)) # elif isinstance(m, nn.BatchNorm2d): # m.weight.data.fill_(1) # m.bias.data.zero_() # for m in retinanet.regressionModel.modules(): # if isinstance(m, nn.Conv2d): # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels # m.weight.data.normal_(0, math.sqrt(2. / n)) # elif isinstance(m, nn.BatchNorm2d): # m.weight.data.fill_(1) # m.bias.data.zero_() if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if use_gpu and torch.cuda.is_available(): #retinanet.load_state_dict(torch.load(parser.model)) retinanet = torch.nn.DataParallel(retinanet).cuda() else: #retinanet.load_state_dict(torch.load(parser.model)) retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam( [{ 'params': retinanet.module.regressionModel.parameters() }, { 'params': retinanet.module.classificationModel.parameters() }], 1e-6) #optimizer = optim.Adam(retinanet.parameters(), lr=1e-6) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: #import pdb #pdb.set_trace() optimizer.zero_grad() if use_gpu and torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot'].cuda()]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) if epoch_num % 5 == 0: torch.save( retinanet.module, '{}_freezinetune_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='csv') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)', default='data/train_retinanet.csv') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)', default='data/class_retinanet.csv') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)', default='data/val_retinanet.csv') parser.add_argument('--model_path', default='coco_resnet_50_map_0_335_state_dict.pt', help='Path to file containing pretrained retinanet') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs_detection', help='Number of epochs for detection', type=int, default=50) parser.add_argument('--epochs_classification', help='Number of epochs for classification', type=int, default=50) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=1, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if parser.model_path is not None: print('loading ', parser.model_path) if 'coco' in parser.model_path: retinanet.load_state_dict(torch.load(parser.model_path), strict=False) else: retinanet = torch.load(parser.model_path) print('Pretrained model loaded!') if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) #Here training the detection retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) loss_style_classif = nn.CrossEntropyLoss() retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) mAP_list = [] mAPbest = 0 for epoch_num in range(parser.epochs_detection): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): [classification_loss, regression_loss], style = retinanet( [data['img'].cuda().float(), data['annot']]) else: [classification_loss, regression_loss ], style = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() if torch.cuda.is_available(): style_loss = loss_style_classif( style, torch.tensor(data['style']).cuda()) else: style_loss = loss_style_classif( style, torch.tensor(data['style'])) loss = classification_loss + regression_loss + style_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.4f} | Regression loss: {:1.4f} | Style loss: {:1.4f} | Running loss: {:1.4f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), float(style_loss), np.mean(loss_hist))) del classification_loss del regression_loss del style_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAPclasses, mAP, accu = csv_eval.evaluate(dataset_val, retinanet) mAP_list.append(mAP) print('mAP_list', mAP_list) if mAP > mAPbest: print('Saving best checkpoint') torch.save(retinanet, 'model_best.pt') mAPbest = mAP scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt') # Here we aggregate all the data to don't have to appy the Retinanet during training. retinanet.load_state_dict(torch.load('model_best.pt').state_dict()) List_feature = [] List_target = [] retinanet.training = False retinanet.eval() retinanet.module.style_inference = True retinanet.module.freeze_bn() epoch_loss = [] with torch.no_grad(): for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): _, _, feature_vec = retinanet(data['img'].cuda().float()) else: _, _, feature_vec = retinanet(data['img'].float()) List_feature.append(torch.squeeze(feature_vec).cpu()) List_target.append(data['style'][0]) except Exception as e: print(e) continue print('END of preparation of the data for classification of style') # Here begins Style training. Need to set to style_train. They are using the same loader, as it was expected to train both at the same time. batch_size_classification = 64 dataloader_train_style = torch.utils.data.DataLoader( StyleDataset(List_feature, List_target), batch_size=batch_size_classification) retinanet.load_state_dict(torch.load('model_best.pt').state_dict()) # Here training the detection retinanet.module.style_inference = False retinanet.module.style_train(True) retinanet.training = True retinanet.train() optimizer = optim.Adam( retinanet.module.styleClassificationModel.parameters(), lr=5e-3, weight_decay=1e-3) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) loss_style_classif = nn.CrossEntropyLoss() retinanet.train() retinanet.module.freeze_bn() criterion = nn.CrossEntropyLoss() accu_list = [] accubest = 0 for epoch_num in range(parser.epochs_classification): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] total = 0 correct = 0 for iter_num, data in enumerate(dataloader_train_style): try: optimizer.zero_grad() inputs, targets = data if torch.cuda.is_available(): inputs, targets = inputs.cuda(), targets.cuda() outputs = retinanet.module.styleClassificationModel( inputs, 0, 0, 0, True) loss = criterion(outputs, targets) loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) total += targets.size(0) _, predicted = torch.max(outputs.data, 1) correct += predicted.eq(targets.data).cpu().sum() print( '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % (epoch_num, parser.epochs_classification, iter_num + 1, (len(dataloader_train_style) // batch_size_classification) + 1, loss.item(), 100. * correct / total)) except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAPclasses, mAP, accu = csv_eval.evaluate(dataset_val, retinanet) accu_list.append(accu) print('mAP_list', mAP_list, 'accu_list', accu_list) if accu > accubest: print('Saving best checkpoint') torch.save(retinanet.module, 'model_best_classif.pt') accubest = accu scheduler.step(accu) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet.module, 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', default='csv', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', default='dataset/pascal_train.csv', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', default='dataset/classes.csv', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', default='dataset/pascal_val.csv', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--weights_folder', help='path to save weight', type=str, required=True) parser = parser.parse_args(args) if not os.path.exists(parser.weights_folder): os.makedirs(parser.weights_folder) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=5, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=4, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=8, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=4, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # import ipdb; ipdb.set_trace() for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] total_loss = 0 total_regression_loss = 0 total_classification_loss = 0 with tqdm(dataloader_train, unit="batch") as tepoch: for data in tepoch: # for iter_num, data in tepoch:#enumerate(dataloader_train): tepoch.set_description(f"Epoch {epoch_num}") try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss total_loss = total_loss + loss total_regression_loss = total_regression_loss + regression_loss total_classification_loss = total_classification_loss + classification_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) # print( # 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( # epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) tepoch.set_postfix(cls_loss="{:1.5f}".format(classification_loss), reg_loss="{:1.5f}".format(regression_loss)) time.sleep(0.1) del classification_loss del regression_loss except Exception as e: print(e) continue tb.add_scalar('Training loss', total_loss, epoch_num) tb.add_scalar('Training regression loss', total_regression_loss, epoch_num) tb.add_scalar('Training accuracy loss', total_classification_loss, epoch_num) if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}/{}_retinanet_{}.pt'.format(parser.weights_folder,parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, '{}/model_final.pt'.format(parser.weights_folder))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='coco') parser.add_argument( '--coco_path', help='Path to COCO directory', default= '/media/zhuzhu/ec114170-f406-444f-bee7-a3dc0a86cfa2/dataset/coco') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--use-gpu', help='training on cpu or gpu', action='store_false', default=True) parser.add_argument('--device-ids', help='GPU device ids', default=[0]) args = parser.parse_args() # ------------------------------ Create the data loaders ----------------------------- if args.dataset == 'coco': if args.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(args.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(args.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) sampler_train = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler_train) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if args.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=False) elif args.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if args.use_gpu: retinanet = nn.DataParallel(retinanet, device_ids=args.device_ids).cuda() # retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(args.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) # 梯度的最大范数为0.1 optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if args.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(args.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--exp_name', help='Path to folder for saving the model and log', type=str) parser.add_argument('--output_folder', help='Path to folder for saving all the experiments', type=str) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) # 100 parser.add_argument('--batch_size', help='Batch size', type=int, default=2) parser.add_argument('--lr', help='Number of epochs', type=float, default=1e-5) parser.add_argument('--caption', help='Any thing in particular about the experiment', type=str) parser.add_argument('--server', help='seerver name', type=str, default='ultron') parser.add_argument('--detector', help='detection algo', type=str, default='RetinaNet') parser.add_argument('--arch', help='model architecture', type=str) parser.add_argument('--pretrain', default=False, action='store_true') parser.add_argument('--freeze_batchnorm', default=False, action='store_true') parser = parser.parse_args(args) output_folder_path = os.path.join(parser.output_folder, parser.exp_name) if not os.path.exists(output_folder_path): os.makedirs(output_folder_path) PARAMS = { 'dataset': parser.dataset, 'exp_name': parser.exp_name, 'depth': parser.depth, 'epochs': parser.epochs, 'batch_size': parser.batch_size, 'lr': parser.lr, 'caption': parser.caption, 'server': parser.server, 'arch': parser.arch, 'pretrain': parser.pretrain, 'freeze_batchorm': parser.freeze_batchnorm } exp = neptune.create_experiment( name=parser.exp_name, params=PARAMS, tags=[parser.arch, parser.detector, parser.dataset, parser.server]) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18 and parser.arch == 'Resnet': retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 10 and parser.arch == 'Resnet': retinanet = model.resnet10(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 18 and parser.arch == 'BiRealNet18': checkpoint_path = None if parser.pretrain: checkpoint_path = '/media/Rozhok/Bi-Real-net/pytorch_implementation/BiReal18_34/models/imagenet_baseline/checkpoint.pth.tar' retinanet = birealnet18(checkpoint_path, num_classes=dataset_train.num_classes()) elif parser.depth == 34 and parser.arch == 'Resnet': retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 50 and parser.arch == 'Resnet': retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 101 and parser.arch == 'Resnet': retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=parser.pretrain) elif parser.arch == 'ofa': print("Model is ResNet50D.") bn_momentum = 0.1 bn_eps = 1e-5 retinanet = ResNet50D( n_classes=dataset_train.num_classes(), bn_param=(bn_momentum, bn_eps), dropout_rate=0, width_mult=1.0, depth_param=3, expand_ratio=0.35, ) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') print(retinanet) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() if parser.freeze_batchnorm: retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): exp.log_metric('Current lr', float(optimizer.param_groups[0]['lr'])) exp.log_metric('Current epoch', int(epoch_num)) retinanet.train() if parser.freeze_batchnorm: retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) exp.log_metric('Training: Classification loss', float(classification_loss)) exp.log_metric('Training: Regression loss', float(regression_loss)) exp.log_metric('Training: Totalloss', float(loss)) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, output_folder_path, exp=exp) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, os.path.join( output_folder_path, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))) retinanet.eval() torch.save(retinanet, os.path.join(output_folder_path, 'model_final.pt'))
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--dcn_layers', type =str, help = 'comma seperated str where laters to be used, 0..3',default = None) parser.add_argument('--use_depth', action='store_true', help='if specified, use depth for deformconv') parser = parser.parse_args(args) use_dcn = [False, False, False, False] if parser.dcn_layers is not None: _t = parser.dcn_layers.split(',') for __t in _t: use_dcn[int(__t)] = True # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=128, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True, use_dcn = use_dcn, use_depth = parser.use_depth) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True writer = SummaryWriter() if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) #mAP = csv_eval.evaluate(dataset_val, retinanet) global_step = 0 for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() global_step += 1 if torch.cuda.is_available(): if parser.use_depth and 'depth' in data: classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']],depth = data['depth'].cuda()) else: classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) else: if parser.use_depth and 'depth' in data: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']],depth=data['depth']) else: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() writer.add_scalar('CLS Loss',classification_loss,global_step) writer.add_scalar('REG Loss',regression_loss,global_step) loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt') writer.close()
def main(args=None): parser = argparse.ArgumentParser(description = 'Simple training script for training a RetinaNet network.') parser.add_argument('--s', help = 'training session', type = int) parser.add_argument('--bs', help = 'batch size', type = int, default = 4) parser.add_argument('--lr', help = 'learning rate', type = float, default = 0.001) parser.add_argument('--save_int', help = 'interval for saving model', type = int) parser.add_argument('--dataset', help = 'Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help = 'Path to COCO directory') parser.add_argument('--csv_train', help = 'Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help = 'Path to file containing class list (see readme)') parser.add_argument('--csv_val', help = 'Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help = 'Resnet depth, must be one of 18, 34, 50, 101, 152', type = int, default = 50) parser.add_argument('--epochs', help = 'Number of epochs', type = int, default = 100) parser.add_argument('--use_tb', help = 'whether to use tensorboard', action = 'store_true') parser.add_argument('--use_aug', help = 'whether to use data augmentation', action = 'store_true') parser = parser.parse_args(args) session = parser.s session_dir = 'session_{:02d}'.format(session) assert os.path.isdir('models'), '[ERROR] models folder not exist' assert os.path.isdir('logs'), '[ERROR] logs folder not exist' model_dir = os.path.join('models', session_dir) logs_dir = os.path.join('logs', session_dir) if not os.path.isdir(model_dir): os.mkdir(model_dir) if not os.path.isdir(logs_dir): os.mkdir(logs_dir) # set up tensorboard logger tb_writer = None if parser.use_tb: tb_writer = SummaryWriter('logs') # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') if parser.use_aug: #transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform = transforms.Compose([Normalizer(), Augmenter(), ToTensor()])) else: dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform = transforms.Compose([Normalizer(), ToTensor()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform = transforms.Compose([Normalizer(), ToTensor()])) #transform = transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), ToTensor()])) #transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), ToTensor()])) #transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size = parser.bs, drop_last = False) dataloader_train = DataLoader(dataset_train, num_workers = 0, collate_fn = collater, batch_sampler = sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size = parser.bs, drop_last = False) dataloader_val = DataLoader(dataset_val, num_workers = 0, collate_fn = collater, batch_sampler = sampler_val) print('# classes: {}'.format(dataset_train.num_classes)) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes = dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes = dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes = dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes = dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes = dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() # disable multi-GPU train retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr = parser.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience = 3, verbose = True) loss_hist = collections.deque(maxlen = 500) retinanet.train() #retinanet.module.freeze_bn() if DataParallel activated retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() # retinanet.module.freeze_bn() if DataParallel activated retinanet.module.freeze_bn() epoch_loss = [] iter_per_epoch = len(dataloader_train) for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() assert data['img'][0].shape[0] == 3, '[ERROR] data first dim should be 3! ({})'.format(data['img'][0].shape) # data['img']: (B, C, H, W) # data['annot']: [x1, y1, x2, y2, class_id] classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) # epoch starts from 0 if (iter_num + 1) % 1 == 0: print( 'Epoch: {} | Iteration: {} | Total loss: {:1.5f} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( epoch_num, iter_num, float(loss), float(classification_loss), float(regression_loss), np.mean(loss_hist) ) ) # update tensorboard if tb_writer is not None: crt_iter = (epoch_num) * iter_per_epoch + (iter_num + 1) tb_dict = { 'total_loss': float(loss), 'classification_loss': float(classification_loss), 'regression_loss': float(regression_loss) } tb_writer.add_scalars('session_{:02d}/loss'.format(session), tb_dict, crt_iter) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) if (epoch_num + 1) % parser.save_int == 0: # retinanet (before DataParallel): <class 'retinanet.model.ResNet'>, no self.module # retinanet (after DataParallel): <class 'torch.nn.parallel.data_parallel.DataParallel>, self.module available # retinanet.module (after DataParallel): <class 'retinanet.model.ResNet'> torch.save(retinanet.module.state_dict(), os.path.join(model_dir, 'retinanet_s{:02d}_e{:03d}.pth'.format(session, epoch_num))) if parser.use_tb: tb_writer.close() retinanet.eval() torch.save(retinanet.module.state_dict(), os.path.join(model_dir, 'retinanet_s{:02d}_e{:03d}.pth'.format(session, epoch_num)))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=25) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # create samplers for both training and validation # using muti CPU cores to accelerate data loading sampler_train1 = torch.utils.data.SequentialSampler(dataset_train) sampler_train2 = torch.utils.data.BatchSampler(sampler_train1, batch_size=1, drop_last=True) dataloader_train = DataLoader(dataset_train, num_workers=10, collate_fn=collater, batch_sampler=sampler_train2) sampler_val1 = torch.utils.data.SequentialSampler(dataset_val) sampler_val2 = torch.utils.data.BatchSampler(sampler_val1, batch_size=1, drop_last=True) dataloader_val = DataLoader(dataset_val, num_workers=10, collate_fn=collater, batch_sampler=sampler_val2) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True # ADAM optimizer optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # using tensorboardX to show training process writer = SummaryWriter('log') iter_sum = 0 time_sum = 0 frame_num = 8 for epoch_num in range(parser.epochs): # only work for frame_num > 8 frame_list = collections.deque(maxlen=frame_num) anno_list = collections.deque(maxlen=frame_num) retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for index, data in enumerate(dataloader_train): try: frame_list.append(data['img']) anno_list.append(data['annot']) # if frame_num != 32: if index < 31: continue if index >= 697 and index <= 697 + 32: continue # real_frame is the frame we used for fish detection # It's the last frame in the batch group real_frame = frame_list[-1] # the annotation for real_frame annot = anno_list[-1] # drop useless frames data['img'] = torch.cat(list(frame_list), dim=0) optimizer.zero_grad() classification_loss, regression_loss = retinanet([ data['img'].cuda().float(), real_frame.cuda().float(), annot.cuda().float() ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) writer.add_scalar('loss_hist', np.mean(loss_hist), iter_sum) writer.add_scalar('classification_loss', float(classification_loss), iter_sum) writer.add_scalar('regression_loss', float(regression_loss), iter_sum) writer.add_scalar('loss', float(loss), iter_sum) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, index, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss iter_sum = iter_sum + 1 except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') # evaluate coco coco_eval.evaluate_coco(dataset_val, dataloader_val, retinanet, frame_num) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, 'checkpoint/{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'save/model_final.pt') writer.close()
coco_path = os.path.join('..', 'simulated_data', 'coco_format', 'mini_easy', 'coco') write_dir = os.path.join('logs', 'session_01', 'train_results') assert os.path.isfile(model_path), '[ERROR] model weights not exist!' assert os.path.isdir(coco_path), '[ERROR] COCO dataset not exist!' dataset = CocoDataset(coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), ToTensor()])) sampler = AspectRatioBasedSampler(dataset, batch_size=1, drop_last=False) dataloader = DataLoader(dataset, num_workers=0, collate_fn=collater, batch_sampler=sampler) retinanet = model.resnet50(num_classes=dataset.num_classes(), pretrained=True) retinanet = retinanet.cuda() retinanet.load_state_dict(torch.load(model_path)) print('loaded model weights: {}'.format(model_path)) retinanet.eval() data = next(iter(dataloader)) img = data['img'].cuda().float() annot = data['annot'].cuda().float() # preds: list of 3 items # - list of scores (0.05 as threshold) # - list of classes (start from index 0, index 0 is NOT background index) # - list of bbox location ([xmin, ymin, xmax, ymax]) preds = retinanet(img)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', type=str, default='csv', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', type=str, default= r'/usr/idip/idip/liuan/project/pytorch_retinanet/RetinaNet-PFA-SPANet/train.csv', help='Path to file containing training annotations (see readme)') parser.add_argument( '--csv_classes', type=str, default= r'/usr/idip/idip/liuan/project/pytorch_retinanet/RetinaNet-PFA-SPANet/class.csv', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', type=str, default= r'/usr/idip/idip/liuan/project/pytorch_retinanet/RetinaNet-PFA-SPANet/val.csv', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--model_save_path', type=str, default= r'/usr/idip/idip/liuan/project/pytorch_retinanet/RetinaNet-PFA-SPANet/model/resnet101+PFA+CFPN/', help='Path to save model') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=101) parser.add_argument('--epochs', help='Number of epochs', type=int, default=150) parser.add_argument('--iter_num', help='Iter number of saving checkpoint', type=int, default=5) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) # 将自定义的Dataset根据batch size大小、是否shuffle等封装成一个Batch Size大小的Tensor,用于后面的训练 dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # add gap save model count variable n = 0 for epoch_num in range(parser.epochs): n += 1 retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): # try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss # except Exception as e: # print(e) # continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) if n % parser.iter_num == 0: torch.save( retinanet.module, parser.model_save_path + '/' + '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, parser.model_save_path + '/' + 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='coco') parser.add_argument('--coco_path', help='Path to COCO directory', default='cocodataset') parser.add_argument('--model_path', help='Path to model (.pt) file.', type=str, default='coco_resnet_50_map_0_335_state_dict.pt') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # Create the model # retinanet = torch.load(parser.model_path) retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) retinanet.load_state_dict(torch.load(parser.model_path)) if use_gpu: device = torch.device('cuda') retinanet.cuda() else: device = torch.device('cpu') retinanet.cpu() retinanet = retinanet.to(device) retinanet.eval() transformer = transforms.Compose( [transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]) #read and transform image for imagepath in imagepaths: im = imageio.imread(imagepath) #im = skimage.transform.resize(im, (640, 928)) #im = skimage.transform.resize(im, (1008, 928)) im = padImage(im) img = torch.from_numpy(im).float().permute(2, 0, 1) / 255 img = transformer(img).unsqueeze(dim=0) with torch.no_grad(): st = time.time() print('processing...') scores, classification, transformed_anchors = retinanet( img.float().to(device)) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores.cpu() > 0.5) img = cv2.cvtColor((255 * im).astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print(label_name) cv2.imshow('img', img) cv2.waitKey(0)
def main(): global args, results, val_image_ids, logger args = parse().parse_args() try: os.makedirs(args.logdir, exist_ok=True) except Exception as exc: raise exc log_file = os.path.join(args.logdir, "train.log") logger = get_logger(__name__, log_file) try: init_distributed_mode(args) distributed = True except KeyError: args.rank = 0 distributed = False if args.dist_mode == "DP": distributed = True args.rank = 0 if args.rank == 0: logger.info(f"distributed mode: {args.dist_mode if distributed else 'OFF'}") if args.val_image_dir is None: if args.rank == 0: logger.info( "No validation image directory specified, will assume the same image directory for train and val" ) args.val_image_dir = args.image_dir writer = SummaryWriter(logdir=args.logdir) img_dim = parse_resize(args.resize) if args.rank == 0: logger.info(f"training image dimensions: {img_dim[0]},{img_dim[1]}") ## print out basic info if args.rank == 0: logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(f"torch.__version__ = {torch.__version__}") # Create the data loaders if args.dataset == "coco": # if args.coco_path is None: # raise ValueError("Must provide --coco_path when training on COCO,") train_transforms = [Normalizer()] if args.augs is None: train_transforms.append(Resizer(img_dim)) else: p = 0.5 if args.augs_prob is not None: p = args.augs_prob aug_map = get_aug_map(p=p) for aug in args.augs: if aug in aug_map.keys(): train_transforms.append(aug_map[aug]) else: logger.info(f"{aug} is not available.") train_transforms.append(Resizer(img_dim)) if args.rank == 0: if len(train_transforms) == 2: logger.info( "Not applying any special augmentations, using only {}".format(train_transforms) ) else: logger.info( "Applying augmentations {} with probability {}".format(train_transforms, p) ) dataset_train = CocoDataset( args.image_dir, args.train_json_path, transform=transforms.Compose(train_transforms), ) elif args.dataset == "csv": if args.csv_train is None: raise ValueError("Must provide --csv_train when training on COCO,") if args.csv_classes is None: raise ValueError("Must provide --csv_classes when training on COCO,") dataset_train = CSVDataset( train_file=args.csv_train, class_list=args.csv_classes, # transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]), transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(img_dim)]), ) if args.csv_val is None: dataset_val = None print("No validation annotations provided.") else: dataset_val = CSVDataset( train_file=args.csv_val, class_list=args.csv_classes, # transform=transforms.Compose([Normalizer(), Resizer()]), transform=transforms.Compose([Normalizer(), Resizer(img_dim)]), ) else: raise ValueError("Dataset type not understood (must be csv or coco), exiting.") if dist.is_available() and distributed and args.dist_mode == "DDP": sampler = DistributedSampler(dataset_train) dataloader_train = DataLoader( dataset_train, sampler=sampler, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=collater, ) elif args.nsr is not None: logger.info(f"using WeightedRandomSampler with negative (image) sample rate = {args.nsr}") weighted_sampler = WeightedRandomSampler( dataset_train.weights, len(dataset_train), replacement=True ) dataloader_train = DataLoader( dataset_train, num_workers=args.num_workers, collate_fn=collater, sampler=weighted_sampler, batch_size=args.batch_size, pin_memory=True, ) else: sampler = AspectRatioBasedSampler( dataset_train, batch_size=args.batch_size, drop_last=False ) dataloader_train = DataLoader( dataset_train, num_workers=args.num_workers, collate_fn=collater, batch_sampler=sampler, pin_memory=True, ) if args.val_json_path is not None: dataset_val = CocoDataset( args.val_image_dir, args.val_json_path, transform=transforms.Compose([Normalizer(), Resizer(img_dim)]), return_ids=True, ) # Create the model if args.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif args.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError("Unsupported model depth, must be one of 18, 34, 50, 101, 152") # Load checkpoint if provided. retinanet = load_checkpoint(retinanet, args.weights, args.depth) use_gpu = True if torch.cuda.is_available(): if dist.is_available() and distributed: if args.dist_mode == "DDP": retinanet = nn.SyncBatchNorm.convert_sync_batchnorm(retinanet) retinanet = retinanet.cuda() elif args.dist_mode == "DP": retinanet = torch.nn.DataParallel(retinanet).cuda() else: raise NotImplementedError else: torch.cuda.set_device(torch.device("cuda:0")) retinanet = retinanet.cuda() # swav = torch.load("/home/bishwarup/Desktop/swav_ckp-50.pth", map_location=torch.device("cpu"))[ # "state_dict" # ] # swav_dict = collections.OrderedDict() # for k, v in swav.items(): # k = k[7:] # discard the module. part # if k in retinanet.state_dict(): # swav_dict[k] = v # logger.info(f"SwAV => {len(swav_dict)} keys matched") # model_dict = copy.deepcopy(retinanet.state_dict()) # model_dict.update(swav_dict) # retinanet.load_state_dict(model_dict) # if use_gpu: # if torch.cuda.is_available(): # if torch.cuda.is_available(): # retinanet = torch.nn.DataParallel(retinanet).cuda() # else: # retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=0.001) # optimizer = torch.optim.SGD( # retinanet.parameters(), lr=4.2, momentum=0.9, weight_decay=1e-4, # ) if dist.is_available() and distributed and args.dist_mode == "DDP": optimizer = LARC(optimizer=optimizer, trust_coefficient=0.001, clip=True) # optimizer = optim.SGD(retinanet.parameters(), lr=0.0001, momentum=0.95) # scheduler = optim.lr_scheduler.CosineAnnealingLR( # optimizer, T_max=args.epochs, eta_min=1e-6 # ) warmup_lr_schedule = np.linspace( args.start_warmup, args.base_lr, len(dataloader_train) * args.warmup_epochs ) iters = np.arange(len(dataloader_train) * (args.epochs - args.warmup_epochs)) cosine_lr_schedule = np.array( [ args.final_lr + 0.5 * (args.base_lr - args.final_lr) * ( 1 + math.cos( math.pi * t / (len(dataloader_train) * (args.epochs - args.warmup_epochs)) ) ) for t in iters ] ) lr_schedule = np.concatenate((warmup_lr_schedule, cosine_lr_schedule)) if distributed and dist.is_available() and args.dist_mode == "DDP": retinanet = nn.parallel.DistributedDataParallel( retinanet, device_ids=[args.gpu_to_work_on], find_unused_parameters=True ) # scheduler_warmup = GradualWarmupScheduler( # optimizer, multiplier=100, total_epoch=5, after_scheduler=scheduler # ) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # scheduler = optim.lr_scheduler.OneCycleLR( # optimizer, # max_lr=1e-4, # total_steps=args.epochs * len(dataloader_train), # pct_start=0.2, # max_momentum=0.95, # ) loss_hist = collections.deque(maxlen=500) if dist.is_available() and distributed: retinanet.module.train() retinanet.module.freeze_bn() else: retinanet.train() retinanet.freeze_bn() # retinanet.module.freeze_bn() if args.rank == 0: logger.info("Number of training images: {}".format(len(dataset_train))) if dataset_val is not None: logger.info("Number of validation images: {}".format(len(dataset_val))) # scaler = amp.GradScaler() global best_map best_map = 0 n_iter = 0 scaler = amp.GradScaler(enabled=True) global keep_pbar keep_pbar = not (distributed and args.dist_mode == "DDP") for epoch_num in range(args.epochs): # scheduler_warmup.step(epoch_num) if dist.is_available() and distributed: if args.dist_mode == "DDP": dataloader_train.sampler.set_epoch(epoch_num) retinanet.module.train() retinanet.module.freeze_bn() else: retinanet.train() retinanet.freeze_bn() # retinanet.module.freeze_bn() epoch_loss = [] results = [] val_image_ids = [] pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train), leave=keep_pbar) for iter_num, data in pbar: n_iter = epoch_num * len(dataloader_train) + iter_num for param_group in optimizer.param_groups: lr = lr_schedule[n_iter] param_group["lr"] = lr optimizer.zero_grad() if torch.cuda.is_available(): with amp.autocast(enabled=False): classification_loss, regression_loss = retinanet( [data["img"].cuda().float(), data["annot"].cuda()] ) else: classification_loss, regression_loss = retinanet( [data["img"].float(), data["annot"]] ) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss # for param_group in optimizer.param_groups: # lr = param_group["lr"] if args.rank == 0: writer.add_scalar("Learning rate", lr, n_iter) pbar_desc = f"Epoch: {epoch_num} | lr = {lr:0.6f} | batch: {iter_num} | cls: {classification_loss:.4f} | reg: {regression_loss:.4f}" pbar.set_description(pbar_desc) pbar.update(1) if bool(loss == 0): continue # loss.backward() scaler.scale(loss).backward() # unscale the gradients for grad clipping scaler.unscale_(optimizer) torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) # optimizer.step() # scheduler.step() # one cycle lr operates at batch level scaler.step(optimizer) scaler.update() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) del classification_loss del regression_loss if args.dataset == "coco": # print("Evaluating dataset") # if args.plot: # stats = coco_eval.evaluate_coco( # dataset_val, # retinanet, # args.logdir, # args.batch_size, # args.num_workers, # writer, # n_iter, # ) # else: # stats = coco_eval.evaluate_coco( # dataset_val, # retinanet, # args.logdir, # args.batch_size, # args.num_workers, # ) if len(dataset_val) > 0: if dist.is_available() and distributed and args.dist_mode == "DDP": sampler_val = DistributedSampler(dataset_val) dataloader_val = DataLoader( dataset_val, sampler=sampler_val, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=eval_collate, pin_memory=True, ) else: dataloader_val = DataLoader( dataset_val, batch_size=args.batch_size, num_workers=args.num_workers, collate_fn=eval_collate, pin_memory=True, drop_last=False, ) validate(retinanet, dataset_val, dataloader_val) if args.rank == 0: if len(results): with open(os.path.join(args.logdir, "val_bbox_results.json"), "w") as f: json.dump(results, f, indent=4) stats = coco_eval.evaluate_coco(dataset_val, val_image_ids, args.logdir) map_avg, map_50, map_75, map_small = stats[:4] else: map_avg, map_50, map_75, map_small = [-1] * 4 if map_50 > best_map: torch.save( retinanet.state_dict(), os.path.join(args.logdir, f"retinanet_resnet{args.depth}_best.pt"), ) best_map = map_50 writer.add_scalar("eval/[email protected]:0.95", map_avg, epoch_num * len(dataloader_train)) writer.add_scalar("eval/[email protected]", map_50, epoch_num * len(dataloader_train)) writer.add_scalar("eval/[email protected]", map_75, epoch_num * len(dataloader_train)) writer.add_scalar("eval/map_small", map_small, epoch_num * len(dataloader_train)) logger.info( f"Epoch: {epoch_num} | lr = {lr:.6f} |[email protected]:0.95 = {map_avg:.4f} | [email protected] = {map_50:.4f} | [email protected] = {map_75:.4f} | map-small = {map_small:.4f}" ) elif args.dataset == "csv" and args.csv_val is not None: # logger.info("Running eval...") mAP = csv_eval.evaluate(dataset_val, retinanet) # scheduler.step(np.mean(epoch_loss)) # scheduler.step() # torch.save(retinanet.module, os.path.join(args.logdir, f"retinanet_{epoch_num}.pt")) retinanet.eval()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='coco') parser.add_argument('--coco_path', help='Path to COCO directory', default='cocodataset') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--model_path', help='Path to model (.pt) file.', type=str, default='coco_resnet_50_map_0_335_state_dict.pt') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(parser.model_path) # Create the model retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.load_state_dict(torch.load(parser.model_path)) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet( data['img'].cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores.cpu() > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print(label_name) cv2.imshow('img', img) cv2.waitKey(0)
def main(args=None): parser = argparse.ArgumentParser( description="Simple training script for training a RetinaNet network." ) parser.add_argument("--dataset", help="Dataset type, must be one of csv or coco.") parser.add_argument("--model", default=None, help="Path to trained model") parser.add_argument("--coco_path", help="Path to COCO directory") parser.add_argument( "--csv_train", help="Path to file containing training annotations (see readme)" ) parser.add_argument( "--csv_classes", help="Path to file containing class list (see readme)" ) parser.add_argument( "--csv_val", help="Path to file containing validation annotations (optional, see readme)", ) parser.add_argument( "--depth", help="Resnet depth, must be one of 18, 34, 50, 101, 152", type=int, default=50, ) parser.add_argument("--epochs", help="Number of epochs", type=int, default=100) parser.add_argument( "--result_dir", default="results", help="Path to store training results", type=str, ) parser.add_argument( "--batch_num", default=8, help="Number of samples in a batch", type=int ) parser = parser.parse_args(args) print(parser) # parameters BATCH_SIZE = parser.batch_num IMAGE_MIN_SIDE = 1440 IMAGE_MAX_SIDE = 2560 # Create the data loaders if parser.dataset == "coco": if parser.coco_path is None: raise ValueError("Must provide --coco_path when training on COCO,") # TODO: parameterize arguments for Resizer, and other transform functions # resizer: min_side=608, max_side=1024 dataset_train = CocoDataset( parser.coco_path, # set_name="train2017", set_name="train_images_full", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer(passthrough=True),] ), ) dataset_val = CocoDataset( parser.coco_path, # set_name="val2017", set_name="val_images_full", transform=transforms.Compose([Normalizer(), Resizer(passthrough=True),]), ) elif parser.dataset == "csv": if parser.csv_train is None: raise ValueError("Must provide --csv_train when training on COCO,") if parser.csv_classes is None: raise ValueError("Must provide --csv_classes when training on COCO,") dataset_train = CSVDataset( train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]), ) if parser.csv_val is None: dataset_val = None print("No validation annotations provided.") else: dataset_val = CSVDataset( train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]), ) else: raise ValueError("Dataset type not understood (must be csv or coco), exiting.") sampler = AspectRatioBasedSampler( dataset_train, batch_size=BATCH_SIZE, drop_last=False ) dataloader_train = DataLoader( dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler ) if dataset_val is not None: sampler_val = AspectRatioBasedSampler( dataset_val, batch_size=BATCH_SIZE, drop_last=False ) dataloader_val = DataLoader( dataset_val, num_workers=16, collate_fn=collater, batch_sampler=sampler_val ) # Create the model if parser.depth == 18: retinanet = model.resnet18( num_classes=dataset_train.num_classes(), pretrained=True ) elif parser.depth == 34: retinanet = model.resnet34( num_classes=dataset_train.num_classes(), pretrained=True ) elif parser.depth == 50: retinanet = model.resnet50( num_classes=dataset_train.num_classes(), pretrained=True ) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True ) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True ) else: raise ValueError("Unsupported model depth, must be one of 18, 34, 50, 101, 152") if parser.model: retinanet = torch.load(parser.model) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, patience=3, verbose=True ) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print("Num training images: {}".format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] p_bar = tqdm(dataloader_train) for iter_num, data in enumerate(p_bar): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data["img"].cuda().float(), data["annot"]] ) else: classification_loss, regression_loss = retinanet( [data["img"].float(), data["annot"]] ) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) mean_loss = np.mean(loss_hist) p_bar.set_description( f"Epoch: {epoch_num} | Iteration: {iter_num} | " f"Class loss: {float(classification_loss.item()):.5f} | " f"Regr loss: {float(regression_loss.item()):.5f} | " f"Running loss: {mean_loss:.5f}" ) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == "coco": print("Evaluating dataset") coco_eval.evaluate_coco( dataset_val, retinanet, result_dir=parser.result_dir ) elif parser.dataset == "csv" and parser.csv_val is not None: print("Evaluating dataset") mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) # TODO: Fix string formating mix (adopt homogeneous format) torch.save( retinanet.module, f"{parser.result_dir}/" + "{}_retinanet_{}.pt".format(parser.dataset, epoch_num), ) retinanet.eval() torch.save(retinanet, "model_final.pt")
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--dataset', help='Dataset type, must be one of csv or coco.') #数据集类型 parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) #选择与训练模型 parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') #决定图片数据集的顺序和batch_size,返回的是图片的分组 sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() #多GPU运行 retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) #collections:模块实现了特定目标的容器,以提供Python标准内建容器 dict、list、set、tuple 的替代选择 #collections.deque:返回双向队列对象,最长长度为500 loss_hist = collections.deque(maxlen=500) # model.train() :启用 BatchNormalization 和 Dropout # model.eval() :不启用 BatchNormalization 和 Dropout retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue #反向传播 loss.backward() #梯度裁剪,梯度小于/大于阈值时,更新的梯度为阈值(此处为小于0.1) torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) #更新所有的参数,一旦梯度被如backward()之类的函数计算好后,我们就可以调用这个函数 optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) #optimizer.step()通常用在每个mini-batch之中,而scheduler.step()通常用在epoch里面 #有用了optimizer.step(),模型才会更新,而scheduler.step()是对lr进行调整。 scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--config', help='Config file path that contains scale and ratio values', type=str) parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) parser.add_argument('--init-lr', help='Initial learning rate for training process', type=float, default=1e-3) parser.add_argument('--batch-size', help='Number of input images per step', type=int, default=1) parser.add_argument('--num-workers', help='Number of worker used in dataloader', type=int, default=1) # For resuming training from saved checkpoint parser.add_argument('--resume', help='Whether to resume training from checkpoint', action='store_true') parser.add_argument('--saved-ckpt', help='Resume training from this checkpoint', type=str) parser.add_argument('--multi-gpus', help='Allow to use multi gpus for training task', action='store_true') parser.add_argument('--snapshots', help='Location to save training snapshots', type=str, default="snapshots") parser.add_argument('--log-dir', help='Location to save training logs', type=str, default="logs") parser.add_argument('--expr-augs', help='Allow to use use experiment augmentation methods', action='store_true') parser.add_argument('--aug-methods', help='(Experiment) Augmentation methods to use, separate by comma symbol', type=str, default="rotate,hflip,brightness,contrast") parser.add_argument('--aug-prob', help='Probability of applying (experiment) augmentation in range [0.,1.]', type=float, default=0.5) parser = parser.parse_args(args) train_transforms = [Normalizer(), Resizer(), Augmenter()] # Define transform methods if parser.expr_augs: aug_map = get_aug_map(p=parser.aug_prob) aug_methods = parser.aug_methods.split(",") for aug in aug_methods: if aug in aug_map.keys(): train_transforms.append(aug_map[aug]) else: print(f"{aug} is not available.") # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose(train_transforms)) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose(train_transforms)) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=parser.batch_size, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler_val) config = dict({"scales": None, "ratios": None}) if parser.config: config = load_config(parser.config, config) if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True, ratios=config["ratios"], scales=config["scales"]) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') optimizer = optim.Adam(retinanet.parameters(), lr=parser.init_lr) if parser.resume: if not parser.saved_ckpt: print("No saved checkpoint provided for resuming training. Exiting now...") return if not os.path.exists(parser.saved_ckpt): print("Invalid saved checkpoint path. Exiting now...") return # Restore last state retinanet, optimizer, start_epoch = load_ckpt(parser.saved_ckpt, retinanet, optimizer) if parser.epochs <= start_epoch: print("Number of epochs must be higher than number of trained epochs of saved checkpoint.") return use_gpu = True if use_gpu: print("Using GPU for training process") if torch.cuda.is_available(): if parser.multi_gpus: print("Using multi-gpus for training process") retinanet = torch.nn.DataParallel(retinanet.cuda(), device_ids=[0,1]) else: retinanet = torch.nn.DataParallel(retinanet.cuda()) else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=1, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # Tensorboard writer writer = SummaryWriter(parser.log_dir) # Save snapshots dir if not os.path.exists(parser.snapshots): os.makedirs(parser.snapshots) best_mAP = 0 start_epoch = 0 if not parser.resume else start_epoch for epoch_num in range(start_epoch, parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] epoch_csf_loss = [] epoch_reg_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): with torch.cuda.device(0): classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss epoch_csf_loss.append(float(classification_loss)) epoch_reg_loss.append(float(regression_loss)) if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( '\rEpoch: {}/{} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( (epoch_num + 1), parser.epochs, (iter_num + 1), len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist)), end='') del classification_loss del regression_loss except Exception as e: print(e) continue # writer.add_scalar("Loss/train", loss, epoch_num) _epoch_loss = np.mean(epoch_loss) _epoch_csf_loss = np.mean(epoch_reg_loss) _epoch_reg_loss = np.mean(epoch_reg_loss) if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) scheduler.step(_epoch_loss) elif parser.dataset == 'csv' and parser.csv_val is not None: print('\nEvaluating dataset') APs = csv_eval.evaluate(dataset_val, retinanet) mAP = round(mean(APs[ap][0] for ap in APs.keys()), 5) print("mAP: %f" %mAP) writer.add_scalar("validate/mAP", mAP, epoch_num) # Handle lr_scheduler wuth mAP value scheduler.step(mAP) lr = get_lr(optimizer) writer.add_scalar("train/classification-loss", _epoch_csf_loss, epoch_num) writer.add_scalar("train/regression-loss", _epoch_reg_loss, epoch_num) writer.add_scalar("train/loss", _epoch_loss, epoch_num) writer.add_scalar("train/learning-rate", lr, epoch_num) # Save model file, optimizer and epoch number checkpoint = { 'epoch': epoch_num, 'state_dict': retinanet.state_dict(), 'optimizer': optimizer.state_dict(), } # torch.save(retinanet.module, os.path.join(parser.snapshots, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))) # Check whether this epoch's model achieves highest mAP value is_best = False if best_mAP < mAP: best_mAP = mAP is_best = True save_ckpt(checkpoint, is_best, parser.snapshots, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num + 1)) print('\n') retinanet.eval() torch.save(retinanet, 'model_final.pt') writer.flush()
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--iou',default='05') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) val_dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=8, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=5e-5) lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) multistep_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[5,8,11,20], gamma=0.2) loss_hist = collections.deque(maxlen=500) val_loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] val_epoch_loss=[] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Train: Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} | Epoch loss: {:1.5f} '.format( epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist),epoch_loss[-1])) del classification_loss del regression_loss except Exception as e: print(e) continue for iter_num, data in enumerate(dataloader_val): try: #optimizer.zero_grad() #retinanet.eval() with torch.no_grad(): if torch.cuda.is_available(): classification_loss, regression_loss = retinanet((data['img'].cuda().float(), data['annot'])) else: classification_loss, regression_loss = retinanet((data['img'].float(), data['annot'])) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue #loss.backward() #torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) #optimizer.step() val_loss_hist.append(float(loss)) val_epoch_loss.append(float(loss)) print( 'Val: Epoch: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} | Epoch loss: {:1.5f} '.format( epoch_num, float(classification_loss), float(regression_loss), np.mean(val_loss_hist),val_epoch_loss[-1])) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') #mAP_train = csv_eval.evaluate(val_dataset_train,retinanet,iou_threshold=float(parser.iou)/10) mAP_val = csv_eval.evaluate(dataset_val, retinanet,iou_threshold=float(parser.iou)/10) #writer.add_scalar('train_mAP_Questions',mAP_train[0][0],epoch_num) writer.add_scalar('val_mAP_Questions', mAP_val[0][0], epoch_num) writer.add_scalar('val_loss',np.mean(val_epoch_loss),epoch_num) writer.add_scalar('train_loss',np.mean(epoch_loss),epoch_num) lr_scheduler.step(np.mean(epoch_loss)) #one_scheduler.step() multistep_scheduler.step() torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.iou, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') # parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--dataset_root', default='/root/data/VOCdevkit/', help= 'Dataset root directory path [/root/data/VOCdevkit/, /root/data/coco/, /root/data/FLIR_ADAS]' ) parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--batch_size', default=16, type=int, help='Batch size for training') parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--lr', '--learning_rate', default=1e-4, type=float, help='initial learning rate') parser.add_argument('--weight_decay', default=5e-4, type=float, help='Weight decay') parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', help='number of data loading workers (default: 4)') parser.add_argument("--log", default=False, action="store_true", help="Write log file.") parser = parser.parse_args(args) network_name = 'RetinaNet-Res{}'.format(parser.depth) # print('network_name:', network_name) net_logger = logging.getLogger('Network Logger') formatter = logging.Formatter(LOGGING_FORMAT) streamhandler = logging.StreamHandler() streamhandler.setFormatter(formatter) net_logger.addHandler(streamhandler) if parser.log: net_logger.setLevel(logging.INFO) # logging.basicConfig(level=logging.DEBUG, format=LOGGING_FORMAT, # filename=os.path.join('log', '{}.log'.format(network_name)), filemode='a') filehandler = logging.FileHandler(os.path.join( 'log', '{}.log'.format(network_name)), mode='a') filehandler.setFormatter(formatter) net_logger.addHandler(filehandler) net_logger.info('Network Name: {:>20}'.format(network_name)) # Create the data loaders if parser.dataset == 'coco': if parser.dataset_root is None: raise ValueError( 'Must provide --dataset_root when training on COCO,') dataset_train = CocoDataset(parser.dataset_root, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.dataset_root, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'FLIR': if parser.dataset_root is None: raise ValueError( 'Must provide --dataset_root when training on FLIR,') _scale = 1.2 dataset_train = FLIRDataset(parser.dataset_root, set_name='train', transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer(min_side=int(512 * _scale), max_side=int(640 * _scale), logger=net_logger) ])) dataset_val = FLIRDataset(parser.dataset_root, set_name='val', transform=transforms.Compose([ Normalizer(), Resizer(min_side=int(512 * _scale), max_side=int(640 * _scale)) ])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be FLIR, COCO or csv), exiting.' ) # Original RetinaNet code # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) # dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) # if dataset_val is not None: # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) # dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) dataloader_train = DataLoader(dataset_train, batch_size=parser.batch_size, num_workers=parser.workers, shuffle=True, collate_fn=collater, pin_memory=True) dataloader_val = DataLoader(dataset_val, batch_size=1, num_workers=parser.workers, shuffle=False, collate_fn=collater, pin_memory=True) build_param = {'logger': net_logger} if parser.resume is not None: net_logger.info('Loading Checkpoint : {}'.format(parser.resume)) retinanet = torch.load(parser.resume) s_b = parser.resume.rindex('_') s_e = parser.resume.rindex('.') start_epoch = int(parser.resume[s_b + 1:s_e]) + 1 net_logger.info('Continue on {} Epoch'.format(start_epoch)) else: # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, **build_param) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, **build_param) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True, **build_param) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True, **build_param) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True, **build_param) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') start_epoch = 0 use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True net_logger.info('Weight Decay : {}'.format(parser.weight_decay)) net_logger.info('Learning Rate : {}'.format(parser.lr)) # optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr, weight_decay=parser.weight_decay) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() # print('Num training images: {}'.format(len(dataset_train))) net_logger.info('Num Training Images: {}'.format(len(dataset_train))) for epoch_num in range(start_epoch, parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() # print(data['img'][0,:,:,:].shape) # print(data['annot']) if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) if (iter_num % 10 == 0): _log = 'Epoch: {} | Iter: {} | Class loss: {:1.5f} | BBox loss: {:1.5f} | Running loss: {:1.5f}'.format( epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)) net_logger.info(_log) del classification_loss del regression_loss except Exception as e: print(e) continue if (epoch_num + 1) % 1 == 0: test(dataset_val, retinanet, epoch_num, parser, net_logger) # if parser.dataset == 'coco': # print('Evaluating dataset') # coco_eval.evaluate_coco(dataset_val, retinanet) # elif parser.dataset == 'csv' and parser.csv_val is not None: # print('Evaluating dataset') # mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) print('Learning Rate:', str(scheduler._last_lr)) torch.save( retinanet.module, os.path.join( 'saved', '{}_{}_{}.pt'.format(parser.dataset, network_name, epoch_num))) retinanet.eval() torch.save(retinanet, 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152, 5032, 10132', type=int, default=10148) parser.add_argument('--epochs', help='Number of epochs', type=int, default=200) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 5032: retinanet = model.resnext50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 10132: retinanet = model.resnext101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 10148: retinanet = model_SE.SEresnext101( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) #change_weight_decay scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] total_classification_loss = 0.0 total_regression_loss = 0.0 epoch_number = 0 for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) ############################# # total_classification_loss += classification_loss # total_regression_loss += regression_loss # epoch_number = epoch_num fp = open(output_path + "clas_reg_loss.txt", "a") fp.write( str(epoch_num) + ',' + str(float(classification_loss)) + ',' + str(float(regression_loss)) + ',' + str(np.mean(loss_hist)) + '\n') # writer.add_scalar('Classification_loss', float(classification_loss), epoch_num) # writer.add_scalar('Regression_loss', float(regression_loss), epoch_num) # writer.flush() ############################# print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue ############################# if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, output_path + '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, output_path + 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--model_save_path', help='Path to save model', type=str) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # add draw tensorboard code writer = SummaryWriter(log_dir='./logs/416*416/', flush_secs=60) # if Cuda: # graph_inputs = torch.from_numpy(np.random.rand(1, 3, input_shape[0], input_shape[1])).type( # torch.FloatTensor).cuda() # else: # graph_inputs = torch.from_numpy(np.random.rand(1, 3, input_shape[0], input_shape[1])).type(torch.FloatTensor) # writer.add_graph(model, (graph_inputs,)) # add gap save model count variable n = 0 for epoch_num in range(parser.epochs): n += 1 retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] ### begin calculate train loss for iter_num, data in enumerate(dataloader_train): # try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss # except Exception as e: # print(e) # continue ### begin calculate valid loss for iter_num, data in enumerate(dataloader_val): # try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss_hist.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Valid-Classification loss: {:1.5f} | Valid-Regression loss: {:1.5f} | Running Valid loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) print('Epoch: {} | mAP: {:.3f}'.format(epoch_num, float(mAP))) scheduler.step(np.mean(epoch_loss)) if n % 10 == 0: torch.save( retinanet.module, parser.model_save_path + '/' + '{}_retinanet_{}_{:.3f}.pt'.format( parser.dataset, epoch_num, mAP)) retinanet.eval() torch.save(retinanet, parser.model_save_path + '/' + 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description="Simple training script for training a RetinaNet network.") parser.add_argument("--dataset", help="Dataset type, must be one of csv or coco.") parser.add_argument("--coco_path", help="Path to COCO directory") parser.add_argument( "--csv_train", help="Path to file containing training annotations (see readme)") parser.add_argument("--csv_classes", help="Path to file containing class list (see readme)") parser.add_argument( "--csv_val", help= "Path to file containing validation annotations (optional, see readme)", ) parser.add_argument( "--depth", help="Resnet depth, must be one of 18, 34, 50, 101, 152", type=int, default=50, ) parser.add_argument("--batch_size", help="Batch size", type=int, default=2) parser.add_argument("--epochs", help="Number of epochs", type=int, default=100) parser.add_argument("--workers", help="Number of workers of dataleader", type=int, default=4) parser = parser.parse_args(args) writer = SummaryWriter("logs") # Create the data loaders if parser.dataset == "coco": if parser.coco_path is None: raise ValueError("Must provide --coco_path when training on COCO,") dataset_train = CocoDataset( parser.coco_path, set_name="train2017", transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()]), ) dataset_val = CocoDataset( parser.coco_path, set_name="val2017", transform=transforms.Compose([Normalizer(), Resizer()]), ) elif parser.dataset == "csv": if parser.csv_train is None: raise ValueError("Must provide --csv_train when training on COCO,") if parser.csv_classes is None: raise ValueError( "Must provide --csv_classes when training on COCO,") dataset_train = CSVDataset( train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()]), ) if parser.csv_val is None: dataset_val = None print("No validation annotations provided.") else: dataset_val = CSVDataset( train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]), ) else: raise ValueError( "Dataset type not understood (must be csv or coco), exiting.") sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader( dataset_train, num_workers=parser.workers, collate_fn=collater, batch_sampler=sampler, ) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=parser.workers, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( "Unsupported model depth, must be one of 18, 34, 50, 101, 152") use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print("Num training images: {}".format(len(dataset_train))) global_step = 0 for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): global_step = iter_num + epoch_num * len(dataloader_train) try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data["img"].cuda().float(), data["annot"]]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) if iter_num % 10 == 0: print( "Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}" .format( epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist), )) writer.add_scalars( "training", { "loss": loss, "loss_cls": classification_loss, "loss_reg": regression_loss, }, global_step, ) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == "coco": print("Evaluating dataset") coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == "csv" and parser.csv_val is not None: print("Evaluating dataset") mAP = csv_eval.evaluate(dataset_val, retinanet) valid_mAP = [x[0] for x in mAP.values() if x[1] > 0] mmAP = sum(valid_mAP) / len(mAP) writer.add_scalars("validation", {"mmAP": mmAP}, global_step) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, "checkpoints/{}_retinanet_{}.pt".format(parser.dataset, epoch_num), ) retinanet.eval() torch.save(retinanet, "checkpoints/odel_final.pt")
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default='csv') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--batch_size', help='Batch size', type=int, default=2) parser.add_argument('--num_workers', help='Number of workers', type=int, default=4) parser.add_argument('--models_out', help='The directory to save models', type=str) parser = parser.parse_args(args) if not os.path.exists(parser.models_out): os.makedirs(parser.models_out) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=parser.num_workers, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) writer = SummaryWriter(log_dir="tensor_log/" + parser.models_out) global_steps = 0 for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) running_loss = np.mean(loss_hist) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), running_loss)) global_steps += 1 writer.add_scalar("Loss/Classification", float(classification_loss), global_steps) writer.add_scalar("Loss/Regression", float(regression_loss), global_steps) writer.add_scalar("Loss/Running", running_loss, global_steps) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) #for k, v in mAP.items(): # writer.add_scalar("Accuracy/map_{}".format(k), v, epoch_num) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, os.path.join( parser.models_out, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num))) retinanet.eval() torch.save(retinanet, os.path.join(parser.models_out, 'model_final.pt'))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--finetune', help='if load trained retina model', type=bool, default=False) parser.add_argument('--gpu', help='', type=bool, default=False) parser.add_argument('--batch_size', help='', type=int, default=2) parser.add_argument('--c', help='continue with formal model', type=bool, default=False) parser.add_argument('--model', help='model path') parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') #sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) sampler = AspectRatioBasedSampler(dataset_train, parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=8, collate_fn=collater, batch_sampler=sampler_val) epochpassed = 0 # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if parser.c: retinanet = torch.load(parser.model) #import pdb #pdb.set_trace() epochpassed = int(parser.model.split('.')[1].split('_')[-1]) use_gpu = parser.gpu #torch.cuda.set_device(5) #import pdb #pdb.set_trace() if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if use_gpu and torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) #original:1e-5 #optimizer =optim.SGD(retinanet.parameters(), lr=0.01,weight_decay=0.0001, momentum=0.9) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) writer = SummaryWriter() for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] epoch_classification_loss = [] epoch_regression_loss = [] for iter_num, data in enumerate(dataloader_train): try: #import pdb #pdb.set_trace() optimizer.zero_grad() if use_gpu and torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot'].cuda()]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) epoch_classification_loss.append(float(classification_loss)) epoch_regression_loss.append(float(regression_loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Epoch loss: {:1.5f}\r' .format(epoch_num + epochpassed, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)), end='') del classification_loss del regression_loss except Exception as e: print(e) continue print( 'Epoch: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Epoch loss: {:1.5f}' .format(epoch_num + epochpassed, np.mean(epoch_classification_loss), np.mean(epoch_regression_loss), np.mean(epoch_loss))) writer.add_scalar('lossrecord/regressionloss', np.mean(epoch_regression_loss), epoch_num + epochpassed) writer.add_scalar('lossrecord/classificationloss', np.mean(epoch_regression_loss), epoch_num + epochpassed) writer.add_scalar('lossrecord/epochloss', np.mean(epoch_loss), epoch_num + epochpassed) if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) if epoch_num % 10 == 0: torch.save( retinanet.module, './models/{}_retinanet{}_highResolution4fold_{}.pt'.format( parser.dataset, parser.depth, epoch_num + epochpassed)) #retinanet.eval() torch.save( retinanet.module, './models/{}_retinanet{}_highResolution4fold_{}.pt'.format( parser.dataset, parser.depth, parser.epochs + epochpassed)) writer.close()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--model_path', help='Path to model', type=str) parser = parser.parse_args(args) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) # Create the model retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet.load_state_dict(torch.load(parser.model_path)) retinanet = torch.nn.DataParallel(retinanet) retinanet.training = False retinanet.eval() retinanet.module.freeze_bn() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataset_val): with torch.no_grad(): st = time.time() # run network if torch.cuda.is_available(): scores, labels, boxes = retinanet(data['img'].permute( 2, 0, 1).cuda().float().unsqueeze(dim=0)) else: scores, labels, boxes = retinanet(data['img'].permute( 2, 0, 1).float().unsqueeze(dim=0)) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores.cpu() > 0.5) tensor = data['img'] * np.array( [[[0.229, 0.224, 0.225]]]) + np.array([[[0.485, 0.456, 0.406]] ]) img = tensor.mul(255).clamp(0, 255).byte().cpu().numpy() img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = boxes[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(labels[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print(label_name) cv2.imshow('img', img) cv2.waitKey(0)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument("--load_model_path", type=str, default=None, help="Path to model (.pt) file.") parser.add_argument('--dataset_type', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--backbone', help='Backbone choice: [ResNet, ResNeXt]', type=str, default='ResNet') parser.add_argument( '--depth', help='ResNet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument("--batch_size", type=int, default=2, help="size of the batches") parser.add_argument("--lr", type=float, default=1e-5, help="adam: learning rate") parser = parser.parse_args(args) results_dir = "results" save_images_dir = os.path.join(results_dir, "images") save_models_dir = os.path.join(results_dir, "saved_models") os.makedirs(results_dir, exist_ok=True) os.makedirs(save_images_dir, exist_ok=True) os.makedirs(save_models_dir, exist_ok=True) # Get today datetime today = datetime.date.today() today = "%d%02d%02d" % (today.year, today.month, today.day) # Get current timme now = time.strftime("%H%M%S") # Backbone name backbone_name = parser.backbone + str(parser.depth) # DataSet name dataset_path = '' # Create the data loaders if parser.dataset_type == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') # dataset_train = CocoDataset(parser.coco_path, set_name='train2017', # transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) # dataset_val = CocoDataset(parser.coco_path, set_name='val2017', # transform=transforms.Compose([Normalizer(), Resizer()])) dataset_train = CocoDataset( parser.coco_path, set_name='train', # transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) transform=transforms.Compose( [Normalizer(), AugmenterWithImgaug(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val', transform=transforms.Compose( [Normalizer(), Resizer()])) dataset_path = parser.coco_path elif parser.dataset_type == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) dataset_path = parser.csv_train else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Retrain the model if parser.load_model_path is not None: # Load pretrained models print("\nLoading model from: [%s]" % parser.load_model_path) retinanet = torch.load(parser.load_model_path) print("\nStart retrain...") # Create the model else: print("\nStart train...") if parser.backbone == 'ResNet': if parser.depth == 18: retinanet = model.resnet18( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152' ) elif parser.backbone == 'ResNeXt': if parser.depth == 50: retinanet = model.resnext50_32x4d( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnext101_32x8d( num_classes=dataset_train.num_classes(), pretrained=True) pass else: raise ValueError( "Unsupported model depth, must be one of 50, 101") else: raise ValueError("Choice a backbone, [ResNet, ResNeXt]") # Get dataset name dataset_name = os.path.split(dataset_path)[-1] # Checkpoint name save_ckpt_name = r"%s_%s-%s-RetinaNet-backbone(%s)-ep(%d)-bs(%d)-lr(%s)" \ % (today, now, dataset_name, backbone_name, parser.epochs, parser.batch_size, parser.lr) os.makedirs(os.path.join(save_images_dir, "%s" % save_ckpt_name), exist_ok=True) os.makedirs(os.path.join(save_models_dir, "%s" % save_ckpt_name), exist_ok=True) tb_log_path = os.path.join("tf_log", save_ckpt_name) tb_writer = SummaryWriter(os.path.join(results_dir, tb_log_path)) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) val_loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) epoch_prev_time = time.time() for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] total_classification_loss = 0.0 total_regression_loss = 0.0 total_running_loss = 0.0 total_val_classification_loss = 0.0 total_val_regression_loss = 0.0 total_val_running_loss = 0.0 batch_prev_time = time.time() for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) # sum the loss for tensorboard at this batch total_regression_loss += regression_loss total_classification_loss += classification_loss total_running_loss += loss.item() # log = 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( # epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)) # Determine approximate time left data_done = iter_num data_left = len(dataloader_train) - data_done batch_time_left = datetime.timedelta( seconds=data_left * (time.time() - batch_prev_time)) batch_time_left = chop_microseconds(batch_time_left) batches_done = epoch_num * len(dataloader_train) + iter_num batches_left = parser.epochs * len( dataloader_train) - batches_done total_time_left = datetime.timedelta( seconds=batches_left * (time.time() - epoch_prev_time)) total_time_left = chop_microseconds(total_time_left) batch_prev_time = time.time() epoch_prev_time = time.time() # Print training step log prefix_log = '[Epoch: {}/{}] | [Batch: {}/{}]'.format( epoch_num + 1, parser.epochs, iter_num + 1, len(dataloader_train)) suffix_log = '[Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}] ETA: {} / {}'.format( float(classification_loss), float(regression_loss), np.mean(loss_hist), batch_time_left, total_time_left) printProgressBar(iteration=iter_num + 1, total=len(dataloader_train), prefix=prefix_log, suffix=suffix_log) del classification_loss del regression_loss except Exception as e: print(e) continue # Validation with torch.no_grad(): val_batch_prev_time = time.time() for iter_num, data in enumerate(dataloader_val): try: val_classification_loss, val_regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) val_classification_loss = val_classification_loss.mean() val_regression_loss = val_regression_loss.mean() val_loss = val_classification_loss + val_regression_loss if bool(val_loss == 0): continue val_loss_hist.append(float(val_loss)) # sum the loss for tensorboard at this batch total_val_regression_loss += val_regression_loss total_val_classification_loss += val_classification_loss total_val_running_loss += val_loss.item() # Determine approximate time left data_done = iter_num data_left = len(dataloader_val) - data_done val_batch_time_left = datetime.timedelta( seconds=data_left * (time.time() - val_batch_prev_time)) val_batch_time_left = chop_microseconds( val_batch_time_left) batches_done = epoch_num * len(dataloader_val) + ( epoch_num + 1) * len(dataloader_train) + iter_num batches_left = parser.epochs * (len( dataloader_train) + len(dataloader_val)) - batches_done total_time_left = datetime.timedelta( seconds=batches_left * (time.time() - epoch_prev_time)) total_time_left = chop_microseconds(total_time_left) val_batch_prev_time = time.time() epoch_prev_time = time.time() # Print training step log prefix_log = 'Validation: [Epoch: {}/{}] | [Batch: {}/{}]'.format( epoch_num + 1, parser.epochs, iter_num + 1, len(dataloader_val)) suffix_log = '[Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}] ETA: {} / {}'.format( float(val_classification_loss), float(val_regression_loss), np.mean(val_loss_hist), val_batch_time_left, total_time_left) printProgressBar(iteration=iter_num + 1, total=len(dataloader_val), prefix=prefix_log, suffix=suffix_log) del val_classification_loss del val_regression_loss except Exception as e: print(e) continue # Evaluate AP if parser.dataset_type == 'coco': print('Evaluating dataset') # coco_eval.evaluate_coco(dataset_val, retinanet) coco_eval.evaluate_coco_and_save_image( dataset_val, retinanet, os.path.join(save_images_dir, save_ckpt_name), epoch_num + 1) elif parser.dataset_type == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) # calculate loss average average_classification_loss = total_classification_loss / len( dataloader_train) average_regression_loss = total_regression_loss / len(dataloader_train) average_running_loss = total_running_loss / len(dataloader_train) # TensorBoard tb_writer.add_scalar(tag='Classification Loss', scalar_value=average_classification_loss, global_step=epoch_num + 1) tb_writer.add_scalar(tag='Regression Loss', scalar_value=average_regression_loss, global_step=epoch_num + 1) tb_writer.add_scalar(tag='Total Loss', scalar_value=average_running_loss, global_step=epoch_num + 1) # Save model print("\nSave model to [%s] at %d epoch\n" % (save_ckpt_name, epoch_num + 1)) checkpoint_path = os.path.join( save_models_dir, "%s/RetinaNet_backbone(%s)_%d.pt" % (save_ckpt_name, backbone_name, epoch_num + 1)) torch.save(retinanet.module, checkpoint_path) # torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset_type, epoch_num + 1)) retinanet.eval() torch.save(retinanet, 'model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') # parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--HW2_path', help='Path to HW2 directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'HW2': if parser.HW2_path is None: raise ValueError('Must provide --HW2_path when training on HW2,') dataset_train = HW2Dataset(parser.HW2_path, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) #dataset_val = HW2Dataset(parser.HW2_path, # transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, batch_size=8, num_workers=3, collate_fn=collater) # if dataset_val is not None: # sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) # dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) #retinanet.load_state_dict(torch.load('coco_resnet_50_map_0_335_state_dict.pt')) #retinanet_state = retinanet.state_dict() #loaded = torch.load('coco_resnet_50_map_0_335_state_dict.pt') #pretrained = {k:v for k, v in loaded.items() if k in retinanet_state} #retinanet_state.update(pretrained) #retinanet.load_state_dict(retinanet_state) retinanet = torch.load('saved_models_3/HW2_retinanet_0.pt') elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet).cuda() else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(pre_epoch, parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet( [data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue # if parser.dataset == 'coco': # print('Evaluating dataset') # coco_eval.evaluate_coco(dataset_val, retinanet) # elif parser.dataset == 'csv' and parser.csv_val is not None: # print('Evaluating dataset') # mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, 'saved_models_3/{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) # retinanet.eval() torch.save(retinanet, 'saved_models_3/model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=150) parser.add_argument('--gpu_num', help='default gpu', type=int, default=5) parser.add_argument('--saved_dir', help='saved dir', default='trained_models/coco/resnet50/') parser = parser.parse_args(args) # GPU 할당 변경하기 GPU_NUM = parser.gpu_num device = torch.device( f'cuda:{GPU_NUM}' if torch.cuda.is_available() else 'cpu') torch.cuda.set_device(device) # change allocation of current GPU print(device) print('Current cuda device ', torch.cuda.current_device()) # check device_ids = [5, 4, 3, 1, 7] # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), device=device, pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), device=device, pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), device=device, pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), device=device, pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), device=device, pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.to(device) if torch.cuda.is_available(): retinanet = torch.nn.DataParallel(retinanet, device_ids=[5, 4, 3, 1, 7], output_device=GPU_NUM).to(device) else: retinanet = torch.nn.DataParallel(retinanet) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) criterion = FocalLoss(device) criterion = criterion.to(device) # optimizer = optim.Adam(retinanet.parameters(), lr = 1e-7) # scheduler = CosineAnnealingWarmUpRestarts(optimizer, T_0=30, T_mult=2, eta_max=0.0004, T_up=10, gamma=0.5) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] loss_per_epoch = 2 start_time = time.time() for iter_num, data in enumerate((dataloader_train)): try: optimizer.zero_grad() if torch.cuda.is_available(): outputs = retinanet( [data['img'].to(device).float(), data['annot']]) else: outputs = retinanet([data['img'].float(), data['annot']]) classification, regression, anchors, annotations = (outputs) classification_loss, regression_loss = criterion( classification, regression, anchors, annotations) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) if iter_num % 500 == 0: print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue print('epoch time :', time.time() - start_time) if loss_per_epoch > np.mean(loss_hist): print('best model is saved') torch.save(retinanet.state_dict(), parser.saved_dir + 'best_model.pt') loss_per_epoch = np.mean(loss_hist) if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt')