def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]), is_visualizing=True) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) retinanet.load_state_dict(torch.load(parser.model)) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() scores_for_rnn = {} for idx, data in enumerate(dataloader_val): print(idx) with torch.no_grad(): img_name = data['img_name'][0] scale = data['scale'][0] scores, transformed_anchors = retinanet(data['img'].cuda().float(), return_all_scores=True) transformed_anchors /= scale scores, transformed_anchors = scores.cpu(), transformed_anchors.cpu() scores = [[scores[i,j].item() for j in range(scores.size(1))] for i in range(scores.size(0))] transformed_anchors = [[transformed_anchors[i,j].item() for j in range(transformed_anchors.size(1))] for i in range(transformed_anchors.size(0))] curr = {'scores': scores, 'bboxes': transformed_anchors} scores_for_rnn[img_name] = curr with open('detections.json', 'w') as f: json.dump(scores_for_rnn, f)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default="csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)', default="binary_class.csv") parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--epochs_only_det', help='Number of epochs to train detection part', type=int, default=1) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement', type=int, default=100) parser.add_argument('--pretrained_model', help='Path of .pt file with pretrained model', default='esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save', default='trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept', type=float, default=0.5) parser.add_argument('--nms_threshold', help='Score above which boxes are kept', type=float, default=0.2) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition', default=95) parser.add_argument('--seg_level', help='[line, word], to choose anchor aspect ratio', default='word') parser.add_argument( '--early_stop_crit', help='Early stop criterion, detection (map) or transcription (cer)', default='cer') parser.add_argument('--max_iters_epoch', help='Max steps per epoch (for debugging)', default=1000000) parser.add_argument('--train_htr', help='Train recognition or not', default='True') parser.add_argument('--train_det', help='Train detection or not', default='True') parser.add_argument( '--binary_classifier', help= 'Wether to use classification branch as binary or not, multiclass instead.', default='False') parser.add_argument( '--htr_gt_box', help='Train recognition branch with box gt (for debugging)', default='False') parser.add_argument( '--ner_branch', help='Train named entity recognition with separate branch', default='False') parser = parser.parse_args(args) if parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train') dataset_name = parser.csv_train.split("/")[-2] dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # Files for training log experiment_id = str(time.time()).split('.')[0] valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'w') for arg in vars(parser): if getattr(parser, arg) is not None: valid_cer_f.write( str(arg) + ' ' + str(getattr(parser, arg)) + '\n') current_commit = subprocess.check_output(['git', 'rev-parse', 'HEAD']) valid_cer_f.write(str(current_commit)) valid_cer_f.write( "epoch_num cer best cer mAP best mAP time\n") valid_cer_f.close() sampler = AspectRatioBasedSampler(dataset_train, batch_size=1, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model train_htr = parser.train_htr == 'True' htr_gt_box = parser.htr_gt_box == 'True' ner_branch = parser.ner_branch == 'True' binary_classifier = parser.binary_classifier == 'True' torch.backends.cudnn.benchmark = False alphabet = dataset_train.alphabet if os.path.exists(parser.pretrained_model): retinanet = torch.load(parser.pretrained_model) retinanet.classificationModel = ClassificationModel( num_features_in=256, num_anchors=retinanet.anchors.num_anchors, num_classes=dataset_train.num_classes()) if ner_branch: retinanet.nerModel = NERModel( feature_size=256, pool_h=retinanet.pool_h, n_classes=dataset_train.num_classes(), pool_w=retinanet.pool_w) else: if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, max_boxes=int(parser.max_boxes), score_threshold=float( parser.score_threshold), seg_level=parser.seg_level, alphabet=alphabet, train_htr=train_htr, htr_gt_box=htr_gt_box, ner_branch=ner_branch, binary_classifier=binary_classifier) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, max_boxes=int(parser.max_boxes), score_threshold=float( parser.score_threshold), seg_level=parser.seg_level, alphabet=alphabet, train_htr=train_htr, htr_gt_box=htr_gt_box) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True train_htr = parser.train_htr == 'True' train_det = parser.train_det == 'True' retinanet.htr_gt_box = parser.htr_gt_box == 'True' retinanet.train_htr = train_htr retinanet.epochs_only_det = parser.epochs_only_det if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=50, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.train() retinanet.module.freeze_bn() best_cer = 1000 best_map = 0 epochs_no_improvement = 0 verbose_each = 20 optimize_each = 1 objective = 100 best_objective = 10000 print(('Num training images: {}'.format(len(dataset_train)))) for epoch_num in range(parser.epochs): cers = [] retinanet.training = True retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): if iter_num > int(parser.max_iters_epoch): break try: if iter_num % optimize_each == 0: optimizer.zero_grad() (classification_loss, regression_loss, ctc_loss, ner_loss) = retinanet([ data['img'].cuda().float(), data['annot'], ctc, epoch_num ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() if train_det: if train_htr: loss = ctc_loss + classification_loss + regression_loss + ner_loss else: loss = classification_loss + regression_loss + ner_loss elif train_htr: loss = ctc_loss else: continue if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) if iter_num % verbose_each == 0: print(( 'Epoch: {} | Step: {} |Classification loss: {:1.5f} | Regression loss: {:1.5f} | CTC loss: {:1.5f} | NER loss: {:1.5f} | Running loss: {:1.5f} | Total loss: {:1.5f}\r' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), float(ctc_loss), float(ner_loss), np.mean(loss_hist), float(loss), "\r"))) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) torch.cuda.empty_cache() except Exception as e: print(e) continue if parser.dataset == 'csv' and parser.csv_val is not None and train_det: print('Evaluating dataset') mAP, text_mAP, current_cer = csv_eval.evaluate( dataset_val, retinanet, score_threshold=parser.score_threshold) #text_mAP,_ = csv_eval_binary_map.evaluate(dataset_val, retinanet,score_threshold=parser.score_threshold) objective = current_cer * (1 - mAP) retinanet.eval() retinanet.training = False retinanet.score_threshold = float(parser.score_threshold) '''for idx,data in enumerate(dataloader_val): if idx>int(parser.max_iters_epoch): break print("Eval CER on validation set:",idx,"/",len(dataset_val),"\r") image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2] #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val) text_gt =".".join(dataset_val.image_names[idx].split('.')[:-1])+'.txt' f =open(text_gt,'r') text_gt_lines=f.readlines()[0] transcript_pred = get_transcript(image_name,data,retinanet,float(parser.score_threshold),float(parser.nms_threshold),dataset_val,alphabet) cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))''' t = str(time.time()).split('.')[0] valid_cer_f.close() #print("GT",text_gt_lines) #print("PREDS SAMPLE:",transcript_pred) if parser.early_stop_crit == 'cer': if float(objective) < float( best_objective): #float(current_cer)<float(best_cer): best_cer = current_cer best_objective = objective epochs_no_improvement = 0 torch.save( retinanet.module, 'trained_models/' + parser.model_out + '{}_retinanet.pt'.format(parser.dataset)) else: epochs_no_improvement += 1 if mAP > best_map: best_map = mAP elif parser.early_stop_crit == 'map': if mAP > best_map: best_map = mAP epochs_no_improvement = 0 torch.save( retinanet.module, 'trained_models/' + parser.model_out + '{}_retinanet.pt'.format(parser.dataset)) else: epochs_no_improvement += 1 if float(current_cer) < float(best_cer): best_cer = current_cer if train_det: print(epoch_num, "mAP: ", mAP, " best mAP", best_map) if train_htr: print("VALID CER:", current_cer, "best CER", best_cer) print("Epochs no improvement:", epochs_no_improvement) valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'a') valid_cer_f.write( str(epoch_num) + " " + str(current_cer) + " " + str(best_cer) + ' ' + str(mAP) + ' ' + str(best_map) + ' ' + str(text_mAP) + '\n') if epochs_no_improvement > 3: for param_group in optimizer.param_groups: if param_group['lr'] > 10e-5: param_group['lr'] *= 0.1 if epochs_no_improvement >= parser.max_epochs_no_improvement: print("TRAINING FINISHED AT EPOCH", epoch_num, ".") sys.exit() scheduler.step(np.mean(epoch_loss)) torch.cuda.empty_cache() retinanet.eval()
def main(args=None): parser = argparse.ArgumentParser(description='Simple testing script for RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.',default = "csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)',default="binary_class.csv") parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--csv_box_annot', help='Path to file containing predicted box annotations ') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--model', help='Path of .pt file with trained model',default = 'esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save',default = 'trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept',default=0.15) parser.add_argument('--nms_threshold', help='Score above which boxes are kept',default=0.2) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement',default=100) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition',default=50) parser.add_argument('--seg_level', help='Line or word, to choose anchor aspect ratio',default='line') parser.add_argument('--htr_gt_box',help='Train recognition branch with box gt (for debugging)',default=False) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'csv': if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) if parser.csv_box_annot is not None: box_annot_data = CSVDataset(train_file=parser.csv_box_annot, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: box_annot_data = None else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if box_annot_data is not None: sampler_val = AspectRatioBasedSampler(box_annot_data, batch_size=1, drop_last=False) dataloader_box_annot = DataLoader(box_annot_data, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) else: dataloader_box_annot = dataloader_val if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model alphabet=dataset_val.alphabet if os.path.exists(parser.model): retinanet = torch.load(parser.model) else: if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True,max_boxes=int(parser.max_boxes),score_threshold=float(parser.score_threshold),seg_level=parser.seg_level,alphabet=alphabet) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() #retinanet = torch.load('../Documents/TRAINED_MODELS/pytorch-retinanet/esposallescsv_retinanet_99.pt') #print "LOADED pretrained MODEL\n\n" optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.module.freeze_bn() best_cer = 1000 epochs_no_improvement=0 cers=[] retinanet.eval() retinanet.module.epochs_only_det = 0 #retinanet.module.htr_gt_box = False retinanet.training=False if parser.score_threshold is not None: retinanet.module.score_threshold = float(parser.score_threshold) '''if parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') ''' mAP = csv_eval.evaluate(dataset_val, retinanet,score_threshold=retinanet.module.score_threshold) aps = [] for k,v in mAP.items(): aps.append(v[0]) print ("VALID mAP:",np.mean(aps)) print("score th",retinanet.module.score_threshold) for idx,data in enumerate(dataloader_box_annot): print("Eval CER on validation set:",idx,"/",len(dataloader_box_annot),"\r") if box_annot_data: image_name = box_annot_data.image_names[idx].split('/')[-1].split('.')[-2] else: image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2] #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val) text_gt_path="/".join(dataset_val.image_names[idx].split('/')[:-1]) text_gt = os.path.join(text_gt_path,image_name+'.txt') f =open(text_gt,'r') text_gt_lines=f.readlines()[0] transcript_pred = get_transcript(image_name,data,retinanet,retinanet.module.score_threshold,float(parser.nms_threshold),dataset_val,alphabet) cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines)) print("GT",text_gt_lines) print("PREDS SAMPLE:",transcript_pred) print("VALID CER:",np.mean(cers),"best CER",best_cer) print("GT",text_gt_lines) print("PREDS SAMPLE:",transcript_pred) print("VALID CER:",np.mean(cers),"best CER",best_cer)
def train(args): train_csv = args.train_csv test_csv = args.test_csv labels_csv = args.labels_csv model_type = args.model_type epochs = int(args.epochs) batch_size = int(args.batch_size) dataset_train = CSVDataset(train_file=train_csv, class_list=labels_csv, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CSVDataset(train_file=test_csv, class_list=labels_csv, transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) retinanet = RetinaNet_efficientnet_b4( num_classes=dataset_train.num_classes(), model_type=model_type) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue mAP, MAP = evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, '{}_retinanet_{}_map{}.pt'.format("EfficientNet" + model_type, epoch_num, MAP)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))
def main(args=None): """ In current implementation, if test csv is provided, we use that as validation set and combine the val and train csv's as the csv for training. If train_all_labeled_data flag is use, then we combine all 3 (if test is provided) for training and use a prespecified learning rate step schedule. """ parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)', default=None) parser.add_argument( '--csv_test', help= 'Path to file containing test annotations (optional, if provided, train & val will be combined for training and test will be used for evaluation)', default=None) parser.add_argument('--lr', type=float, default=2e-5) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=101) parser.add_argument('--epochs', help='Number of epochs', type=int, default=25) parser.add_argument('--model_output_dir', type=str, default='models') parser.add_argument( '--train_all_labeled_data', help= 'Combine train, val, and test into 1 training set. Will use prespecified learning rate scheduler steps', action='store_true') parser.add_argument('--resnet-backbone-normalization', choices=['batch_norm', 'group_norm'], type=str, default='batch_norm') parser = parser.parse_args(args) print('Learning Rate: {}'.format(parser.lr)) print("Normalization: ", parser.resnet_backbone_normalization) # Create folder - will raise error if folder exists assert (os.path.exists(parser.model_output_dir) == False) os.mkdir(parser.model_output_dir) if parser.csv_train is None: raise ValueError('Must provide --csv_train when training,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training,') if not parser.csv_val and parser.csv_test: raise ValueError( "Cannot specify test set without specifying validation set") if parser.train_all_labeled_data: csv_paths = [parser.csv_train, parser.csv_val, parser.csv_test] train_csv = [] for path in csv_paths: if isinstance(path, str): train_csv.append(path) val_csv = None else: if parser.csv_train and parser.csv_val and parser.csv_test: train_csv = [parser.csv_train, parser.csv_val ] # Combine train and val sets for training val_csv = parser.csv_test else: train_csv = parser.csv_train val_csv = parser.csv_val print('loading train data') print(train_csv) dataset_train = CSVDataset(train_file=train_csv, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) print(dataset_train.__len__()) if val_csv is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=val_csv, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) print('putting data into loader') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model print('creating model') if parser.depth == 18: retinanet = model.resnet18( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 34: retinanet = model.resnet34( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 50: retinanet = model.resnet50( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) lr_factor = 0.3 if not parser.train_all_labeled_data: scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=lr_factor, verbose=True) else: # these milestones are for when using the lung masks - not for unmasked lung data scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[12, 16, 20, 24], gamma=lr_factor) # masked training #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[14, 18, 22, 26], gamma=lr_factor) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() #initialize tensorboard writer = SummaryWriter(comment=parser.model_output_dir) # Augmentation seq = iaa.Sequential([ iaa.Fliplr(0.5), iaa.Flipud(0.5), iaa.Affine(scale={ "x": (1.0, 1.2), "y": (1.0, 1.2) }, rotate=(-20, 20), shear=(-4, 4)) ], random_order=True) def augment(data, seq): for n, img in enumerate(data['img']): # imgaug needs dim in format (H, W, C) image = data['img'][n].permute(1, 2, 0).numpy() bbs_array = [] for ann in data['annot'][n]: x1, y1, x2, y2, _ = ann bbs_array.append(BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2)) bbs = BoundingBoxesOnImage(bbs_array, shape=image.shape) image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs) # save augmented image and chage dims to (C, H, W) data['img'][n] = torch.tensor(image_aug.copy()).permute(2, 0, 1) # save augmented annotations for i, bbox in enumerate(bbs_aug.bounding_boxes): x1, y1, x2, y2 = bbox.x1, bbox.y1, bbox.x2, bbox.y2 obj_class = data['annot'][n][i][-1] data['annot'][n][i] = torch.tensor([x1, y1, x2, y2, obj_class]) return data print('Num training images: {}'.format(len(dataset_train))) dir_training_images = os.path.join(os.getcwd(), writer.log_dir, 'training_images') os.mkdir(dir_training_images) best_validation_loss = None best_validation_map = None for epoch_num in range(parser.epochs): writer.add_scalar('Train/LR', optimizer.param_groups[0]['lr'], epoch_num) retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() data = augment(data, seq) # save a few training images to see what augmentation looks like if iter_num % 100 == 0 and epoch_num == 0: x1, y1, x2, y2, _ = data['annot'][0][0] fig, ax = plt.subplots(1) ax.imshow(data['img'][0][1]) rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor='r', facecolor='none', alpha=1) ax.add_patch(rect) fig.savefig( os.path.join(dir_training_images, '{}.png'.format(iter_num))) plt.close() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() if parser.resnet_backbone_normalization == 'batch_norm': torch.nn.utils.clip_grad_norm_( parameters=retinanet.parameters(), max_norm=0.1) else: torch.nn.utils.clip_grad_norm_( parameters=retinanet.parameters(), max_norm=0.01 ) # Decrease norm to reduce risk of exploding gradients optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue writer.add_scalar('Train/Loss', np.mean(epoch_loss), epoch_num) if not parser.train_all_labeled_data: print('Evaluating Validation Loss...') with torch.no_grad(): retinanet.train() val_losses, val_class_losses, val_reg_losses = [], [], [] for val_iter_num, val_data in enumerate(dataloader_val): try: val_classification_loss, val_regression_loss = retinanet( [ val_data['img'].cuda().float(), val_data['annot'] ]) val_losses.append( float(val_classification_loss) + float(val_regression_loss)) val_class_losses.append(float(val_classification_loss)) val_reg_losses.append(float(val_regression_loss)) del val_classification_loss, val_regression_loss except Exception as e: print(e) continue print( 'VALIDATION Epoch: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Total loss: {:1.5f}' .format(epoch_num, np.mean(val_class_losses), np.mean(val_reg_losses), np.mean(val_losses))) # Save model with best validation loss if best_validation_loss is None: best_validation_loss = np.mean(val_losses) if best_validation_loss >= np.mean(val_losses): best_validation_loss = np.mean(val_losses) torch.save( retinanet.module, parser.model_output_dir + '/best_result_valloss.pt') writer.add_scalar('Validation/Loss', np.mean(val_losses), epoch_num) # Calculate Validation mAP print('Evaluating validation mAP') mAP = csv_eval.evaluate(dataset_val, retinanet) print("Validation mAP: " + str(mAP[0][0])) if best_validation_map is None: best_validation_map = mAP[0][0] elif best_validation_map < mAP[0][0]: best_validation_map = mAP[0][0] torch.save( retinanet.module, parser.model_output_dir + '/best_result_valmAP.pt') writer.add_scalar('Validation/mAP', mAP[0][0], epoch_num) if not parser.train_all_labeled_data: scheduler.step(np.mean(val_losses)) else: scheduler.step() torch.save( retinanet.module, parser.model_output_dir + '/retinanet_{}.pt'.format(epoch_num)) retinanet.eval() torch.save(retinanet, parser.model_output_dir + '/model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a CTracker network.') parser.add_argument('--dataset', default='csv', type=str, help='Dataset type, must be one of csv or coco.') parser.add_argument('--model_dir', default='./ctracker/', type=str, help='Path to save the model.') parser.add_argument( '--root_path', default='/Dataset/Tracking/MOT17/', type=str, help='Path of the directory containing both label and images') parser.add_argument( '--csv_train', default='train_annots.csv', type=str, help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', default='train_labels.csv', type=str, help='Path to file containing class list (see readme)') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--print_freq', help='Print frequency', type=int, default=100) parser.add_argument( '--save_every', help='Save a checkpoint of model at given interval of epochs', type=int, default=5) parser = parser.parse_args(args) print(parser) print(parser.model_dir) if not os.path.exists(parser.model_dir): os.makedirs(parser.model_dir) # Create the data loaders if parser.dataset == 'csv': if (parser.csv_train is None) or (parser.csv_train == ''): raise ValueError('Must provide --csv_train when training on COCO,') if (parser.csv_classes is None) or (parser.csv_classes == ''): raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(parser.root_path, train_file=os.path.join(parser.root_path, parser.csv_train), class_list=os.path.join(parser.root_path, parser.csv_classes), \ transform=transforms.Compose([RandomSampleCrop(), PhotometricDistort(), Augmenter(), Normalizer()]))#transforms.Compose([Normalizer(), Augmenter(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=32, collate_fn=collater, batch_sampler=sampler) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True # optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) optimizer = optim.Adam(retinanet.parameters(), lr=5e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) total_iter = 0 for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: total_iter = total_iter + 1 optimizer.zero_grad() (classification_loss, regression_loss), reid_loss = retinanet([ data['img'].cuda().float(), data['annot'], data['img_next'].cuda().float(), data['annot_next'] ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() reid_loss = reid_loss.mean() # loss = classification_loss + regression_loss + track_classification_losses loss = classification_loss + regression_loss + reid_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) # print frequency default=100 or e.g. --print_freq 500 if total_iter % parser.print_freq == 0: print( 'Epoch: {} | Iter: {} | Cls loss: {:1.5f} | Reid loss: {:1.5f} | Reg loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(reid_loss), float(regression_loss), np.mean(loss_hist))) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) # Save a checkpoint of model at given interval of epochs e.g. --save_every 10 if epoch_num % parser.save_every == 0: torch.save( retinanet, os.path.join(parser.model_dir, "weights_epoch_" + str(epoch_num) + ".pt")) retinanet.eval() torch.save(retinanet, os.path.join(parser.model_dir, 'model_final.pt')) run_from_train(parser.model_dir, parser.root_path)
def bbox_extraction(file_list='./data/images2.csv'): weights_path = './models/csv_retinanet_25.pt' csv_classes = './classes.csv' dataset_val = CSVDataset(train_file=file_list, class_list=csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) # dataset_val = CSVDataset(train_file=file_list, class_list= csv_classes, transform=transforms.Compose([Normalizer()])) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=False) retinanet.load_state_dict(torch.load(weights_path)) use_gpu = True if torch.cuda.is_available(): device = torch.device("cuda") if use_gpu: retinanet = retinanet.to(device) retinanet.eval() unnormalize = UnNormalizer() for idx, data in enumerate(dataloader_val): with torch.no_grad(): scores, classification, transformed_anchors = retinanet( data['img'].to(device).float()) def get_bbox(classification, transformed_anchors, label=0): bbox = {} idx = np.where(classification == label)[0][0] co_ord = transformed_anchors[idx, :] bbox['x1'] = int(co_ord[0]) bbox['y1'] = int(co_ord[1]) bbox['x2'] = int(co_ord[2]) bbox['y2'] = int(co_ord[3]) return bbox scores = scores.cpu().numpy() classification = classification.cpu().numpy() transformed_anchors = transformed_anchors.cpu().numpy() # print('scores:',scores) # print('classification:', classification) # print('transformed_anchors', transformed_anchors) bbox = {} bbox['neck'] = get_bbox(classification, transformed_anchors, label=0) bbox['stomach'] = get_bbox(classification, transformed_anchors, label=1) # print('neck',bbox['neck'] ) # print('stomach',bbox['stomach'] ) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) cv2.rectangle(img, (bbox['neck']['x1'], bbox['neck']['y1']), (bbox['neck']['x2'], bbox['neck']['y2']), color=(0, 0, 255), thickness=2) cv2.rectangle(img, (bbox['stomach']['x1'], bbox['stomach']['y1']), (bbox['stomach']['x2'], bbox['stomach']['y2']), color=(0, 0, 255), thickness=2) # cv2.imshow('img', img) # cv2.imwrite('./sample_11.jpg',img) # cv2.waitKey(0) return bbox # bbox_extraction() # if __name__ == '__main__': # main()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) parser.add_argument('--model_name', help='name of the model to save') parser.add_argument('--pretrained', help='pretrained model name') parser = parser.parse_args(args) # Create the data loaders dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Resizer(), Augmenter(), Normalizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Resizer(), Normalizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) #dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_size=8, shuffle=True) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=2, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=16, collate_fn=collater, batch_sampler=sampler_val) #dataloader_val = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_size=8, shuffle=True) # Create the model_pose_level_attention if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes()) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes()) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes()) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes()) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes()) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if ckpt: retinanet = torch.load('') print('load ckpt') else: retinanet_dict = retinanet.state_dict() pretrained_dict = torch.load('./weight/' + parser.pretrained) pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in retinanet_dict } retinanet_dict.update(pretrained_dict) retinanet.load_state_dict(retinanet_dict) print('load pretrained backbone') print(retinanet) retinanet = torch.nn.DataParallel(retinanet, device_ids=[0]) retinanet.cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) #optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) f_map = open('./mAP_txt/' + parser.model_name + '.txt', 'a') writer = SummaryWriter(log_dir='./summary') iters = 0 for epoch_num in range(0, parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] #scheduler.step() for iter_num, data in enumerate(dataloader_train): iters += 1 optimizer.zero_grad() classification_loss_f, regression_loss_f, classification_loss_v, regression_loss_v = retinanet( [ data['img'].cuda().float(), data['annot'], data['vbox'], data['ignore'] ]) classification_loss_f = classification_loss_f.mean() regression_loss_f = regression_loss_f.mean() classification_loss_v = classification_loss_v.mean() regression_loss_v = regression_loss_v.mean() loss = classification_loss_f + regression_loss_f + classification_loss_v + regression_loss_v if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss_f: {:1.5f} | Regression loss_f: {:1.5f} | Classification loss_v {:1.5f} | Regression loss_v {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss_f), float(regression_loss_f), float(classification_loss_v), float(regression_loss_v), np.mean(loss_hist))) writer.add_scalar('classification_loss_f', classification_loss_f, iters) writer.add_scalar('regression_loss_f', regression_loss_f, iters) writer.add_scalar('classification_loss_v', classification_loss_v, iters) writer.add_scalar('regression_loss_v', regression_loss_v, iters) writer.add_scalar('loss', loss, iters) if parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) f_map.write('mAP:{}, epoch:{}'.format(mAP[0][0], epoch_num)) f_map.write('\n') scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num)) retinanet.eval() writer.export_scalars_to_json( "./summary/' + parser.pretrained + 'all_scalars.json") f_map.close() writer.close()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--wider_train', help='Path to file containing WIDER training annotations (see readme)') parser.add_argument( '--wider_val', help= 'Path to file containing WIDER validation annotations (optional, see readme)' ) parser.add_argument('--wider_train_prefix', help='Prefix path to WIDER train images') parser.add_argument('--wider_val_prefix', help='Prefix path to WIDER validation images') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) parser.add_argument('--batch_size', help='Batch size (default 2)', type=int, default=2) parser.add_argument('--model_name', help='Name of the model to save') parser.add_argument('--parallel', help='Run training with DataParallel', dest='parallel', default=False, action='store_true') parser.add_argument('--pretrained', help='Pretrained model name in weight directory') parser = parser.parse_args(args) create_dirs() # Create the data loaders if parser.wider_train is None: dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Resizer(), Augmenter(), Normalizer()])) else: dataset_train = WIDERDataset(train_file=parser.wider_train, img_prefix=parser.wider_train_prefix, transform=transforms.Compose([ Resizer(), Augmenter(), Normalizer() ])) if parser.wider_val is None: if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: print('Loading CSV validation dataset') dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Resizer(), Normalizer()])) else: print('Loading WIDER validation dataset') dataset_val = WIDERDataset(train_file=parser.wider_val, img_prefix=parser.wider_val_prefix, transform=transforms.Compose( [Resizer(), Normalizer()])) print('Loading training dataset') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) if parser.parallel: dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) else: dataloader_train = DataLoader(dataset_train, collate_fn=collater, batch_sampler=sampler) # Create the model_pose_level_attention if parser.depth == 18: retinanet = resnet18(num_classes=dataset_train.num_classes()) elif parser.depth == 34: retinanet = resnet34(num_classes=dataset_train.num_classes()) elif parser.depth == 50: retinanet = resnet50(num_classes=dataset_train.num_classes()) elif parser.depth == 101: retinanet = resnet101(num_classes=dataset_train.num_classes()) elif parser.depth == 152: retinanet = resnet152(num_classes=dataset_train.num_classes()) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if ckpt: retinanet = torch.load('') print('Loading checkpoint') else: print('Loading pretrained model') retinanet_dict = retinanet.state_dict() if parser.pretrained is None: pretrained_dict = model_zoo.load_url(model_urls['resnet' + str(parser.depth)]) else: pretrained_dict = torch.load('./weight/' + parser.pretrained) pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in retinanet_dict } retinanet_dict.update(pretrained_dict) retinanet.load_state_dict(retinanet_dict) print('load pretrained backbone') print(retinanet) retinanet = torch.nn.DataParallel(retinanet, device_ids=[0]) if is_cuda: retinanet.cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) # optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1) loss_hist = collections.deque(maxlen=500) retinanet.train() if parser.parallel: retinanet.module.freeze_bn() else: retinanet.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) f_map = open('./mAP_txt/' + parser.model_name + '.txt', 'a') writer = SummaryWriter(log_dir='./summary') iters = 0 for epoch_num in range(0, parser.epochs): retinanet.train() if parser.parallel: retinanet.module.freeze_bn() else: retinanet.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): iters += 1 optimizer.zero_grad() img_data = data['img'].float() annot_data = data['annot'] if is_cuda: img_data = img_data.cuda() annot_data = annot_data.cuda() classification_loss, regression_loss, mask_loss = retinanet( [img_data, annot_data]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() mask_loss = mask_loss.mean() loss = classification_loss + regression_loss + mask_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | ' 'mask_loss {:1.5f} | Running loss: {:1.5f}'.format( epoch_num, iter_num, float(classification_loss), float(regression_loss), float(mask_loss), np.mean(loss_hist))) writer.add_scalar('classification_loss', float(classification_loss), iters) writer.add_scalar('regression_loss', float(regression_loss), iters) writer.add_scalar('loss', float(loss), iters) del classification_loss del regression_loss del loss if parser.wider_val is not None: print('Evaluating dataset') mAP = evaluate(dataset_val, retinanet, is_cuda=is_cuda) f_map.write('mAP:{}, epoch:{}'.format(mAP[0][0], epoch_num)) f_map.write('\n') scheduler.step(np.mean(epoch_loss)) if parser.parallel: torch.save( retinanet.module, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num)) else: torch.save( retinanet, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num)) retinanet.eval() writer.export_scalars_to_json( "./summary/' + parser.pretrained + 'all_scalars.json") f_map.close() writer.close()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes_general', help='Path to file containing class list (see readme)') parser.add_argument('--csv_features', help='Path to dir containing features csv files') parser.add_argument('--csv_colors', help='Path to file containing color classes') parser.add_argument('--csv_types', help='Path to file containing type classes') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--image_dir', help='Path to file containing images (optional, see readme)') parser.add_argument('--pretrain_model', help='Path to model (.pt) file.') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes_general is None: raise ValueError( 'Must provide --csv_classes_general when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes_general, color_classes=parser.csv_colors, type_classes=parser.csv_types, feature_class_dir=parser.csv_features, image_dir=parser.image_dir, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes_general, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: # retinanet = nn.DataParallel(retinanet) # torch.cuda.set_device(0) retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True if parser.pretrain_model is not None: retinanet = torch.load(parser.pretrain_model) print('load model: ' + str(parser.pretrain_model)) optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))
def train(csv_train=None, csv_classes=None, csv_val=None, epochs=12, depth=50, batch_size=2): dataset = "csv" # Create the data loaders if dataset == 'csv': if csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=csv_train, class_list=csv_classes, transform=transforms.Compose([RandomHorizontalFlip(0.3),RandomRotation(6),Gamma_Correction(0.2), Image_Noise(0.2), Blur(0.2) , Normalizer(), Augmenter(), Resizer()])) if csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=csv_val, class_list=csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) # Change total_loss_data = [] class_loss_data = [] reg_loss_data = [] # Change for epoch_num in range(epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] # Change epoch_reg_loss = [] epoch_class_loss = [] # Change for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) # Change epoch_reg_loss.append(float(regression_loss)) epoch_class_loss.append(float(classification_loss)) # Change print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if dataset == 'csv' and csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) # Change total_loss_data.append(np.mean(epoch_loss)) class_loss_data.append(np.mean(epoch_class_loss)) reg_loss_data.append(np.mean(epoch_reg_loss)) print("Epoch loss", total_loss_data) print("Epoch loss - classification", class_loss_data) print("Epoch loss - Regression", reg_loss_data) # Change scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}_retinanet_{}.pt'.format(dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num)) # Change import matplotlib.pyplot as plt plt.plot(total_loss_data, label='Total loss') plt.plot(class_loss_data, label='Classification loss') plt.plot(reg_loss_data, label='Regression loss') plt.ylabel("Loss") plt.xlabel("Epoch") plt.title("Epoch losses") plt.legend() plt.show()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--train-file', help='Path to file containing training annotations (see readme)') parser.add_argument('--classes-file', help='Path to file containing class list (see readme)') parser.add_argument( '--val-file', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--title', type=str, default='') parser.add_argument("--resume_model", type=str, default="") parser.add_argument("--resume_epoch", type=int, default=0) parser.add_argument("--reinit-classifier", action="store_true", default=False) parser.add_argument("--lr", type=float, default=.00001) parser.add_argument("--all-box-regression", action="store_true", default=False) parser.add_argument("--batch-size", type=int, default=16) parser = parser.parse_args(args) log_dir = "./runs/" + parser.title writer = SummaryWriter(log_dir) #pdb.set_trace() with open(log_dir + '/config.csv', 'w') as f: for item in vars(parser): print(item + ',' + str(getattr(parser, item))) f.write(item + ',' + str(getattr(parser, item)) + '\n') if not os.path.isdir(log_dir + "/checkpoints"): os.makedirs(log_dir + "/checkpoints") if not os.path.isdir(log_dir + '/map_files'): os.makedirs(log_dir + '/map_files') dataset_train = CSVDataset(train_file=parser.train_file, class_list=parser.classes_file, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.val_file is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.val_file, class_list=parser.classes_file, transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=True) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=parser.batch_size, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=8, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if parser.resume_model: x = torch.load(parser.resume_model) if parser.reinit_classifier: dummy = nn.Conv2d(256, 9 * dataset_train.num_classes(), kernel_size=3, padding=1) x['classificationModel.output.weight'] = dummy.weight.clone() x['classificationModel.output.bias'] = dummy.bias.clone() prior = 0.01 x['classificationModel.output.weight'].data.fill_(0) x['classificationModel.output.bias'].data.fill_(-math.log( (1.0 - prior) / prior)) retinanet.load_state_dict(x) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() #torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() # x = torch.load('./csv_retinanet_20.pth') # retinanet.module.load_state_dict(x) print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.resume_epoch, parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] i = 0 avg_class_loss = 0.0 avg_reg_loss = 0.0 for iter_num, data in enumerate(dataloader_train): i += 1 try: optimizer.zero_grad() #pdb.set_trace() shape = data['img'].shape[2] * data['img'].shape[3] writer.add_scalar("train/image_shape", shape, epoch_num * (len(dataloader_train)) + i) classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot'].cuda().float()]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() avg_class_loss += classification_loss avg_reg_loss += regression_loss if i % 100 == 0: writer.add_scalar("train/classification_loss", avg_class_loss / 100, epoch_num * (len(dataloader_train)) + i) writer.add_scalar("train/regression_loss", avg_reg_loss / 100, epoch_num * (len(dataloader_train)) + i) avg_class_loss = 0.0 avg_reg_loss = 0.0 loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if epoch_num % 2 == 0: print('Evaluating dataset') retinanet.eval() mAP, AP_string = csv_eval.evaluate(dataset_val, retinanet.module, score_threshold=0.1) with open( log_dir + '/map_files/retinanet_{}.txt'.format(epoch_num), 'w') as f: f.write(AP_string) total = 0.0 all = 0.0 total_unweighted = 0.0 for c in mAP: total += mAP[c][0] * mAP[c][1] total_unweighted += mAP[c][0] all += mAP[c][1] writer.add_scalar("val/mAP", total / all, epoch_num) writer.add_scalar("val/mAP_unweighted", total_unweighted / len(mAP), epoch_num) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module.state_dict(), log_dir + '/checkpoints/retinanet_{}.pth'.format(epoch_num)) retinanet.eval() torch.save(retinanet.module.state_dict(), log_dir + '/checkpoints/model_final.pth'.format(epoch_num))
sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) retinanet.load_state_dict(state_dict, strict=False) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--title', type=str, default='') parser.add_argument("--resume_model", type=str, default="") parser.add_argument("--resume_epoch", type=int, default=0) parser = parser.parse_args(args) title = parser.resume_model.split('.')[0] log_dir = "./runs/" + title writer = SummaryWriter(log_dir) if not os.path.isdir(log_dir + "/checkpoints"): os.makedirs(log_dir + "/checkpoints") if not os.path.isdir(log_dir + '/map_files'): os.makedirs(log_dir + '/map_files') if parser.dataset == 'csv': if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_val.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if parser.resume_model: retinanet.load_state_dict(torch.load(parser.resume_model)) use_gpu = True if use_gpu: retinanet = retinanet.cuda() theshes = [.05, 0.1, 0.2, 0.3] i = 0 for thresh in theshes: i = i + 1 retinanet.eval() print('Evaluating dataset') mAP, AP_string = csv_eval.evaluate(dataset_val, retinanet, score_threshold=thresh) with open( log_dir + '/map_files/{}_retinanet_{}.txt'.format( parser.dataset, thresh), 'w') as f: f.write(AP_string) total = 0.0 all = 0.0 total_unweighted = 0.0 for c in mAP: total += mAP[c][0] * mAP[c][1] total_unweighted += mAP[c][0] all += mAP[c][1] writer.add_scalar("thresh_finder/mAP", total / all, i) writer.add_scalar("thresh_finder/mAP_unweighted", total_unweighted / len(mAP), i)
def main(args=None): parser = argparse.ArgumentParser( description='Training a RetinaNet network.') parser.add_argument('--csv_train', help='Path to file containing training annotations') parser.add_argument('--csv_classes', help='Path to file containing class list') parser.add_argument('--csv_val', help='Path to file containing validation \ annotations') parser.add_argument("--depth", help='Resnet depth, must be one of \ 18, 34, 50,101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs to run', type=int, default=100) parser.add_argument('--batch_size', help='Number of training sample per batch', type=int, default=16) parser.add_argument('--score_thresh', help='score threshold to discard \ background/reduce nms processing time', default=0.05) parser.add_argument("--iou_nms1", help="iou for nms used during validation and \ inference", type=float, default=0.3) parser.add_argument('--lr', help='learning rate', type=float, default=6e-4) parser.add_argument('--pretrained', type=bool, default=False) parser.add_argument('--logfile') args = parser.parse_args(args) outputdir = os.path.dirname(args.logfile) if not os.path.isdir(outputdir): os.makedirs(outputdir) # Create the data loaders if args.csv_train is None: raise ValueError('Must provide --csv_train when training on CSV,') if args.csv_classes is None: raise ValueError('Must provide --csv_classes when training on CSV,') dataset_train = CSVDataset(train_file=args.csv_train, class_list=args.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if args.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=args.csv_val, class_list=args.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) dataloader_train = DataLoader(dataset_train, batch_size=args.batch_size, num_workers=3, collate_fn=collater, shuffle=True) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if args.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=args.pretrained) elif args.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=args.pretrained) elif args.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=args.pretrained) elif args.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=args.pretrained) elif args.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=args.pretrained) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True retinanet.score_thresh = args.score_thresh retinanet.iou_nms1 = args.iou_nms1 optimizer = optim.Adam(retinanet.parameters(), lr=args.lr) # # LR Finder # lr_finder = LRFinder(retinanet, optimizer, losses.FocalLossQ, device="cuda") # lr_finder.range_test(dataloader_train, end_lr=10, num_iter=1260, diverge_th=10) # Ir_finder.plot(skip_start=0, skip_end=3, show_lr=3e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) print("Num training images: {}".format(len(dataset_train))) for epoch_num in range(args.epochs): retinanet.train() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | ' 'Regression loss: {:1.5f} | Running loss: {:1.5f}'.format( epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss if args.csv_val is not None: mAP = csv_eval.evaluate(dataset_val, retinanet) with open(args.logfile, mode='a') as f: f.write("mAP:\n") aps = [] for i, label_name in enumerate(dataset_val.classes): f.write('{}: {}| Count: {}\n'.format( label_name, mAP[i][0], mAP[i][1])) aps.append(mAP[i][0]) f.write('mAP: {}\n'.format(np.mean(aps))) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, '{}/retinanet_{}.pt'.format(outputdir, epoch_num)) torch.save(retinanet.module.state_dict(), '{}/statedict_{}.pt'.format(outputdir, epoch_num)) retinanet.eval()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') #add a bunch of arguments(customized by Yu Han Huang) parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--model', default='None') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--resnext', help='change backbone to resnext101', action='store_true') parser.add_argument('--epochs', help='Number of Epochs', type=int, default=12) parser.add_argument('--batch_size', help='Batch Size', type=int, default=4) parser.add_argument('--workers', help='Number of Workers', type=int, default=4) parser.add_argument('--lr', help='Learning Rate for training', type=float, default=1e-5) parser.add_argument( '--dropout1', help='Dropout Rate for layer dropout1 in ClassficationModel', type=float, default=0.25) parser.add_argument( '--dropout2', help='Dropout Rate for layer dropout2 in ClassficationModel', type=float, default=0.25) parser.add_argument( '--angle', help='Angle of pictures while implementing Data Augmentation', type=float, default=6) parser.add_argument('--size', help='The length of the side of pictures', type=int, default=512) parser.add_argument( '--zoom_range', help= 'Zoom Range of pictures while implementing Data Augmentation. Please type two arguments for this one.', nargs='+', type=float, default=[-0.1, 0.1]) parser.add_argument('--alpha', help='Alpha for focal loss', type=float, default=0.25) parser.add_argument('--gamma', help='Gamma for focal loss', type=float, default=2) parser.add_argument('--loss_with_no_bboxes', action='store_true') parser.add_argument('--no_bboxes_alpha', help='Alpha for focal loss', type=float, default=0.5) parser.add_argument('--no_bboxes_gamma', help='Gamma for focal loss', type=float, default=2) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on CSV,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on CSV,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([ Normalizer(), Augmenter(angle=parser.angle), Resizer(zoom_range=parser.zoom_range, side=parser.side) ])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), ValResizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=parser.workers, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model # add arguments dropout1, dropout2, alpha, gamma, loss_with_no_bboxes, no_bboxes_alpha, no_bboxes_gamma(customized by Yu Han Huang) if parser.resnext == False: if parser.depth == 18: retinanet = model.resnet18( num_classes=dataset_train.num_classes(), pretrained=True, dropout1=parser.dropout1, dropout2=parser.dropout2, alpha=parser.alpha, gamma=parser.gamma, loss_with_no_bboxes=parser.loss_with_no_bboxes, no_bboxes_alpha=parser.no_bboxes_alpha, no_bboxes_gamma=parser.no_bboxes_gamma) elif parser.depth == 34: retinanet = model.resnet34( num_classes=dataset_train.num_classes(), pretrained=True, dropout1=parser.dropout1, dropout2=parser.dropout2, alpha=parser.alpha, gamma=parser.gamma, loss_with_no_bboxes=parser.loss_with_no_bboxes, no_bboxes_alpha=parser.no_bboxes_alpha, no_bboxes_gamma=parser.no_bboxes_gamma) elif parser.depth == 50: retinanet = model.resnet50( num_classes=dataset_train.num_classes(), pretrained=True, dropout1=parser.dropout1, dropout2=parser.dropout2, alpha=parser.alpha, gamma=parser.gamma, loss_with_no_bboxes=parser.loss_with_no_bboxes, no_bboxes_alpha=parser.no_bboxes_alpha, no_bboxes_gamma=parser.no_bboxes_gamma) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True, dropout1=parser.dropout1, dropout2=parser.dropout2, alpha=parser.alpha, gamma=parser.gamma, loss_with_no_bboxes=parser.loss_with_no_bboxes, no_bboxes_alpha=parser.no_bboxes_alpha, no_bboxes_gamma=parser.no_bboxes_gamma) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True, dropout1=parser.dropout1, dropout2=parser.dropout2, alpha=parser.alpha, gamma=parser.gamma, loss_with_no_bboxes=parser.loss_with_no_bboxes, no_bboxes_alpha=parser.no_bboxes_alpha, no_bboxes_gamma=parser.no_bboxes_gamma) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') else: if parser.depth == 101: retinanet = model.resnext101( num_classes=dataset_train.num_classes(), pretrained=True, dropout1=parser.dropout1, dropout2=parser.dropout2, alpha=parser.alpha, gamma=parser.gamma, loss_with_no_bboxes=parser.loss_with_no_bboxes, no_bboxes_alpha=parser.no_bboxes_alpha, no_bboxes_gamma=parser.no_bboxes_gamma) use_gpu = True if parser.model != 'None': retinanet = torch.load(parser.model) if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() print_activate = 0 epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss #print(classification_loss, regression_loss) if bool(loss == 0): continue loss.backward() print_activate += 1 torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) if print_activate % 15 == 0: print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del loss del classification_loss del regression_loss except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, '{}_retinanet_resnext_v4_{}.pt'.format(parser.dataset, epoch_num)) if parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))