def init_data(parser, verb_orders): dataset_train = CSVDataset(train_file=parser.train_file, class_list=parser.classes_file, verb_info= verb_orders, is_training=True, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(True)])) if parser.val_file is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.val_file, class_list=parser.classes_file, verb_info= verb_orders, is_training=False, transform=transforms.Compose([Normalizer(), Resizer(False)])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=True) dataloader_train = DataLoader(dataset_train, num_workers=64, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=parser.batch_size, drop_last=True) dataloader_val = DataLoader(dataset_val, num_workers=64, collate_fn=collater, batch_sampler=sampler_val) return dataloader_train, dataset_train, dataloader_val, dataset_val
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', default='coco', help='Dataset type, must be one of csv or coco.') parser.add_argument( '--coco_path', default='/home/hao.wyh/jupyter/黑边/smart_reverse_label/coco/', help='Path to COCO directory') #parser.add_argument('--coco_path', default='/home/hao.wyh/jupyter/黑边/评估任务/3k_imgs/coco/', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() coco_eval.evaluate_coco(dataset_val, retinanet)
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]), is_visualizing=True) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) retinanet.load_state_dict(torch.load(parser.model)) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() scores_for_rnn = {} for idx, data in enumerate(dataloader_val): print(idx) with torch.no_grad(): img_name = data['img_name'][0] scale = data['scale'][0] scores, transformed_anchors = retinanet(data['img'].cuda().float(), return_all_scores=True) transformed_anchors /= scale scores, transformed_anchors = scores.cpu(), transformed_anchors.cpu() scores = [[scores[i,j].item() for j in range(scores.size(1))] for i in range(scores.size(0))] transformed_anchors = [[transformed_anchors[i,j].item() for j in range(transformed_anchors.size(1))] for i in range(transformed_anchors.size(0))] curr = {'scores': scores, 'bboxes': transformed_anchors} scores_for_rnn[img_name] = curr with open('detections.json', 'w') as f: json.dump(scores_for_rnn, f)
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--score_threshold', help='Score above which boxes are kept',default=0.5) parser.add_argument('--nms_threshold', help='Score above which boxes are kept',default=0.5) parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val,shuffle=False) retinanet = torch.load(parser.model) score_threshold = float(parser.score_threshold) nms_threshold = float(parser.score_threshold) use_gpu = True f = open('mAPs.txt','w') writer = csv.writer(f,delimiter = ",") if use_gpu: retinanet = retinanet.cuda() retinanet.eval() thresholds = np.array([0.1 + 0.05*n for n in range(10)]) for nms_threshold in thresholds: for score_threshold in thresholds: mAPs = csv_eval.evaluate(dataset_val, retinanet,iou_threshold=0.5,score_threshold=score_threshold,nms_threshold = nms_threshold) maps=np.mean([ap[0] for ap in mAPs.values()]) writer.writerow([score_threshold,nms_threshold,maps])
def get_training_dataloader(self, set_name='train' ): # this can be used for entire sets with redirect_stdout(None): self.training_dataset = CocoDataset(root_dir=self.root_dir, set_name=set_name, transform=None) [min_w, min_h] = self.get_min_size(self.training_dataset) self.training_dataset.transform = transforms.Compose( [Normalizer(), Augmenter(), Resizer()]) # RandomCropOrScale(min_w, min_h)]) # training_dataset = self.get_dataset(set_name=set_name) sampler_train = AspectRatioBasedSampler(self.training_dataset, batch_size=self.batch_size, shuffle=True) self.training_dataloader = DataLoader(dataset=self.training_dataset, num_workers=self.workers, collate_fn=collater, batch_sampler=sampler_train, pin_memory=True) self.print_data_statistics(data_loader=self.training_dataloader, set_type='Training')
def get_validation_dataloader(self, sub_dir=None, sort=False, set_name='val'): with redirect_stdout(None): self.validation_dataset = CocoDataset( root_dir=self.root_dir, set_name=set_name, sub_dir=sub_dir, transform=transforms.Compose([Normalizer(), Resizer()]), categories=self.categories, sort=sort) # validation_dataset = self.get_dataset(set_name=set_name, sub_dir=sub_dir) sampler_val = AspectRatioBasedSampler(self.validation_dataset, batch_size=1, shuffle=False) self.validation_dataloader = DataLoader(self.validation_dataset, num_workers=self.workers, collate_fn=collater, batch_sampler=sampler_val, pin_memory=True) self.print_data_statistics(data_loader=self.validation_dataloader, set_type='Validation')
def main(args=None): #def main(epoch): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) #parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint') parser.add_argument('--start-epoch', default=0, type=int, help='manual epoch number (useful on restarts)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser = parser.parse_args(args) #args = parser.parse_args() #parser = parser.parse_args(epoch) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() #retinanet().load_state_dict(torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/resnet50-19c8e357.pth')) #if True: #print('==> Resuming from checkpoint..') #checkpoint = torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/coco_retinanet_2.pt') #retinanet().load_state_dict(checkpoint) #best_loss = checkpoint['loss'] #start_epoch = checkpoint['epoch'] retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True #optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) optimizer = optim.SGD(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() #retinanet.freeze_bn() #for train from a middle state retinanet.module.freeze_bn() #for train from the very beginning print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.start_epoch, parser.epochs): if parser.resume: if os.path.isfile(parser.resume): print("=>loading checkpoint '{}'".format(parser.resume)) checkpoint = torch.load(parser.resume) print(parser.start_epoch) #parser.start_epoch = checkpoint['epoch'] #retinanet.load_state_dict(checkpoint['state_dict']) retinanet=checkpoint #retinanet.load_state_dict(checkpoint) print(retinanet) #optimizer.load_state_dict(checkpoint) print("=> loaded checkpoint '{}' (epoch {})".format(parser.resume, checkpoint)) else: print("=> no checkpoint found at '{}'".format(parser.resume)) retinanet.train() retinanet.freeze_bn() #retinanet.module.freeze_bn() if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda()]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) #torch.save(retinanet.module, '{}_retinanet_101_{}.pt'.format(parser.dataset, epoch_num)) torch.save(retinanet, '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num)) name = '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num) parser.resume = '/users/wenchi/ghwwc/pytorch-retinanet-master_new/name' retinanet.eval() torch.save(retinanet, 'model_final_dilation_experiment1.pt'.format(epoch_num))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory', type=str, default='./data/coco') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--checkpoint', help='The path to the checkpoint.', type=str, default=None) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--batch_size', help='Number of batch', type=int, default=16) parser.add_argument('--gpu_ids', help='Gpu parallel', type=str, default='1, 2') parser = parser.parse_args(args) # Create the data lodaders dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() gpu_ids = parser.gpu_ids.split(',') device = torch.device("cuda:" + gpu_ids[0]) torch.cuda.set_device(device) gpu_ids = list(map(int, gpu_ids)) retinanet = torch.nn.DataParallel(retinanet, device_ids=gpu_ids).to(device) if parser.checkpoint: pretrained = torch.load(parser.checkpoint).state_dict() retinanet.module.load_state_dict(pretrained) # add tensorboard to record train log retinanet.training = True writer = SummaryWriter('./log') # writer.add_graph(retinanet, input_to_model=[images, labels]) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].to(device), data['ann'].to(device)]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) writer.add_scalar('Loss/train', loss, iter_num) writer.add_scalar('Loss/reg_loss', regression_loss, iter_num) writer.add_scalar('Loss/cls_loss', classification_loss, iter_num) epoch_loss.append(float(loss)) if (iter_num + 1) % 1000 == 0: print('Save model') torch.save( retinanet.module, 'COCO_retinanet_epoch{}_iter{}.pt'.format( epoch_num, iter_num)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, writer) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, 'COCO_retinanet_{}.pt'.format(epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))
def train(args): train_csv = args.train_csv test_csv = args.test_csv labels_csv = args.labels_csv model_type = args.model_type epochs = int(args.epochs) batch_size = int(args.batch_size) dataset_train = CSVDataset(train_file=train_csv, class_list=labels_csv, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CSVDataset(train_file=test_csv, class_list=labels_csv, transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) retinanet = RetinaNet_efficientnet_b4( num_classes=dataset_train.num_classes(), model_type=model_type) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue mAP, MAP = evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, '{}_retinanet_{}_map{}.pt'.format("EfficientNet" + model_type, epoch_num, MAP)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, '{}_retinanet_dilation_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final_dilation.pt'.format(epoch_num))
epoch_max = 100 batch_size = 1 dataset_train = CocoDataset(data_root_dir, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(data_root_dir, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=True) train_data_loader = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=batch_size, drop_last=True) val_data_loader = DataLoader(dataset_val, num_workers=8, collate_fn=collater, batch_sampler=sampler_val) # 以使用retinanet_50为例,测试获取数据是否成功 retinanet = model.retinanet_50(dataset_train.num_classes(),
def main(args=None): parser = argparse.ArgumentParser(description='Simple testing script for RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.',default = "csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)',default="binary_class.csv") parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--csv_box_annot', help='Path to file containing predicted box annotations ') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--model', help='Path of .pt file with trained model',default = 'esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save',default = 'trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept',default=0.15) parser.add_argument('--nms_threshold', help='Score above which boxes are kept',default=0.2) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement',default=100) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition',default=50) parser.add_argument('--seg_level', help='Line or word, to choose anchor aspect ratio',default='line') parser.add_argument('--htr_gt_box',help='Train recognition branch with box gt (for debugging)',default=False) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'csv': if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) if parser.csv_box_annot is not None: box_annot_data = CSVDataset(train_file=parser.csv_box_annot, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: box_annot_data = None else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if box_annot_data is not None: sampler_val = AspectRatioBasedSampler(box_annot_data, batch_size=1, drop_last=False) dataloader_box_annot = DataLoader(box_annot_data, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) else: dataloader_box_annot = dataloader_val if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model alphabet=dataset_val.alphabet if os.path.exists(parser.model): retinanet = torch.load(parser.model) else: if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True,max_boxes=int(parser.max_boxes),score_threshold=float(parser.score_threshold),seg_level=parser.seg_level,alphabet=alphabet) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() #retinanet = torch.load('../Documents/TRAINED_MODELS/pytorch-retinanet/esposallescsv_retinanet_99.pt') #print "LOADED pretrained MODEL\n\n" optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.module.freeze_bn() best_cer = 1000 epochs_no_improvement=0 cers=[] retinanet.eval() retinanet.module.epochs_only_det = 0 #retinanet.module.htr_gt_box = False retinanet.training=False if parser.score_threshold is not None: retinanet.module.score_threshold = float(parser.score_threshold) '''if parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') ''' mAP = csv_eval.evaluate(dataset_val, retinanet,score_threshold=retinanet.module.score_threshold) aps = [] for k,v in mAP.items(): aps.append(v[0]) print ("VALID mAP:",np.mean(aps)) print("score th",retinanet.module.score_threshold) for idx,data in enumerate(dataloader_box_annot): print("Eval CER on validation set:",idx,"/",len(dataloader_box_annot),"\r") if box_annot_data: image_name = box_annot_data.image_names[idx].split('/')[-1].split('.')[-2] else: image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2] #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val) text_gt_path="/".join(dataset_val.image_names[idx].split('/')[:-1]) text_gt = os.path.join(text_gt_path,image_name+'.txt') f =open(text_gt,'r') text_gt_lines=f.readlines()[0] transcript_pred = get_transcript(image_name,data,retinanet,retinanet.module.score_threshold,float(parser.nms_threshold),dataset_val,alphabet) cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines)) print("GT",text_gt_lines) print("PREDS SAMPLE:",transcript_pred) print("VALID CER:",np.mean(cers),"best CER",best_cer) print("GT",text_gt_lines) print("PREDS SAMPLE:",transcript_pred) print("VALID CER:",np.mean(cers),"best CER",best_cer)
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet(data['img'].cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print(label_name) cv2.imshow('img', img) cv2.waitKey(0)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a CTracker network.') parser.add_argument('--dataset', default='csv', type=str, help='Dataset type, must be one of csv or coco.') parser.add_argument('--model_dir', default='./ctracker/', type=str, help='Path to save the model.') parser.add_argument( '--root_path', default='/Dataset/Tracking/MOT17/', type=str, help='Path of the directory containing both label and images') parser.add_argument( '--csv_train', default='train_annots.csv', type=str, help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', default='train_labels.csv', type=str, help='Path to file containing class list (see readme)') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--print_freq', help='Print frequency', type=int, default=100) parser.add_argument( '--save_every', help='Save a checkpoint of model at given interval of epochs', type=int, default=5) parser = parser.parse_args(args) print(parser) print(parser.model_dir) if not os.path.exists(parser.model_dir): os.makedirs(parser.model_dir) # Create the data loaders if parser.dataset == 'csv': if (parser.csv_train is None) or (parser.csv_train == ''): raise ValueError('Must provide --csv_train when training on COCO,') if (parser.csv_classes is None) or (parser.csv_classes == ''): raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(parser.root_path, train_file=os.path.join(parser.root_path, parser.csv_train), class_list=os.path.join(parser.root_path, parser.csv_classes), \ transform=transforms.Compose([RandomSampleCrop(), PhotometricDistort(), Augmenter(), Normalizer()]))#transforms.Compose([Normalizer(), Augmenter(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=32, collate_fn=collater, batch_sampler=sampler) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True # optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) optimizer = optim.Adam(retinanet.parameters(), lr=5e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) total_iter = 0 for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: total_iter = total_iter + 1 optimizer.zero_grad() (classification_loss, regression_loss), reid_loss = retinanet([ data['img'].cuda().float(), data['annot'], data['img_next'].cuda().float(), data['annot_next'] ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() reid_loss = reid_loss.mean() # loss = classification_loss + regression_loss + track_classification_losses loss = classification_loss + regression_loss + reid_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) # print frequency default=100 or e.g. --print_freq 500 if total_iter % parser.print_freq == 0: print( 'Epoch: {} | Iter: {} | Cls loss: {:1.5f} | Reid loss: {:1.5f} | Reg loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(reid_loss), float(regression_loss), np.mean(loss_hist))) except Exception as e: print(e) continue scheduler.step(np.mean(epoch_loss)) # Save a checkpoint of model at given interval of epochs e.g. --save_every 10 if epoch_num % parser.save_every == 0: torch.save( retinanet, os.path.join(parser.model_dir, "weights_epoch_" + str(epoch_num) + ".pt")) retinanet.eval() torch.save(retinanet, os.path.join(parser.model_dir, 'model_final.pt')) run_from_train(parser.model_dir, parser.root_path)
def main(args=None): """ In current implementation, if test csv is provided, we use that as validation set and combine the val and train csv's as the csv for training. If train_all_labeled_data flag is use, then we combine all 3 (if test is provided) for training and use a prespecified learning rate step schedule. """ parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)', default=None) parser.add_argument( '--csv_test', help= 'Path to file containing test annotations (optional, if provided, train & val will be combined for training and test will be used for evaluation)', default=None) parser.add_argument('--lr', type=float, default=2e-5) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=101) parser.add_argument('--epochs', help='Number of epochs', type=int, default=25) parser.add_argument('--model_output_dir', type=str, default='models') parser.add_argument( '--train_all_labeled_data', help= 'Combine train, val, and test into 1 training set. Will use prespecified learning rate scheduler steps', action='store_true') parser.add_argument('--resnet-backbone-normalization', choices=['batch_norm', 'group_norm'], type=str, default='batch_norm') parser = parser.parse_args(args) print('Learning Rate: {}'.format(parser.lr)) print("Normalization: ", parser.resnet_backbone_normalization) # Create folder - will raise error if folder exists assert (os.path.exists(parser.model_output_dir) == False) os.mkdir(parser.model_output_dir) if parser.csv_train is None: raise ValueError('Must provide --csv_train when training,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training,') if not parser.csv_val and parser.csv_test: raise ValueError( "Cannot specify test set without specifying validation set") if parser.train_all_labeled_data: csv_paths = [parser.csv_train, parser.csv_val, parser.csv_test] train_csv = [] for path in csv_paths: if isinstance(path, str): train_csv.append(path) val_csv = None else: if parser.csv_train and parser.csv_val and parser.csv_test: train_csv = [parser.csv_train, parser.csv_val ] # Combine train and val sets for training val_csv = parser.csv_test else: train_csv = parser.csv_train val_csv = parser.csv_val print('loading train data') print(train_csv) dataset_train = CSVDataset(train_file=train_csv, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) print(dataset_train.__len__()) if val_csv is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=val_csv, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) print('putting data into loader') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model print('creating model') if parser.depth == 18: retinanet = model.resnet18( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 34: retinanet = model.resnet34( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 50: retinanet = model.resnet50( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) lr_factor = 0.3 if not parser.train_all_labeled_data: scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=lr_factor, verbose=True) else: # these milestones are for when using the lung masks - not for unmasked lung data scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[12, 16, 20, 24], gamma=lr_factor) # masked training #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[14, 18, 22, 26], gamma=lr_factor) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() #initialize tensorboard writer = SummaryWriter(comment=parser.model_output_dir) # Augmentation seq = iaa.Sequential([ iaa.Fliplr(0.5), iaa.Flipud(0.5), iaa.Affine(scale={ "x": (1.0, 1.2), "y": (1.0, 1.2) }, rotate=(-20, 20), shear=(-4, 4)) ], random_order=True) def augment(data, seq): for n, img in enumerate(data['img']): # imgaug needs dim in format (H, W, C) image = data['img'][n].permute(1, 2, 0).numpy() bbs_array = [] for ann in data['annot'][n]: x1, y1, x2, y2, _ = ann bbs_array.append(BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2)) bbs = BoundingBoxesOnImage(bbs_array, shape=image.shape) image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs) # save augmented image and chage dims to (C, H, W) data['img'][n] = torch.tensor(image_aug.copy()).permute(2, 0, 1) # save augmented annotations for i, bbox in enumerate(bbs_aug.bounding_boxes): x1, y1, x2, y2 = bbox.x1, bbox.y1, bbox.x2, bbox.y2 obj_class = data['annot'][n][i][-1] data['annot'][n][i] = torch.tensor([x1, y1, x2, y2, obj_class]) return data print('Num training images: {}'.format(len(dataset_train))) dir_training_images = os.path.join(os.getcwd(), writer.log_dir, 'training_images') os.mkdir(dir_training_images) best_validation_loss = None best_validation_map = None for epoch_num in range(parser.epochs): writer.add_scalar('Train/LR', optimizer.param_groups[0]['lr'], epoch_num) retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() data = augment(data, seq) # save a few training images to see what augmentation looks like if iter_num % 100 == 0 and epoch_num == 0: x1, y1, x2, y2, _ = data['annot'][0][0] fig, ax = plt.subplots(1) ax.imshow(data['img'][0][1]) rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor='r', facecolor='none', alpha=1) ax.add_patch(rect) fig.savefig( os.path.join(dir_training_images, '{}.png'.format(iter_num))) plt.close() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() if parser.resnet_backbone_normalization == 'batch_norm': torch.nn.utils.clip_grad_norm_( parameters=retinanet.parameters(), max_norm=0.1) else: torch.nn.utils.clip_grad_norm_( parameters=retinanet.parameters(), max_norm=0.01 ) # Decrease norm to reduce risk of exploding gradients optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue writer.add_scalar('Train/Loss', np.mean(epoch_loss), epoch_num) if not parser.train_all_labeled_data: print('Evaluating Validation Loss...') with torch.no_grad(): retinanet.train() val_losses, val_class_losses, val_reg_losses = [], [], [] for val_iter_num, val_data in enumerate(dataloader_val): try: val_classification_loss, val_regression_loss = retinanet( [ val_data['img'].cuda().float(), val_data['annot'] ]) val_losses.append( float(val_classification_loss) + float(val_regression_loss)) val_class_losses.append(float(val_classification_loss)) val_reg_losses.append(float(val_regression_loss)) del val_classification_loss, val_regression_loss except Exception as e: print(e) continue print( 'VALIDATION Epoch: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Total loss: {:1.5f}' .format(epoch_num, np.mean(val_class_losses), np.mean(val_reg_losses), np.mean(val_losses))) # Save model with best validation loss if best_validation_loss is None: best_validation_loss = np.mean(val_losses) if best_validation_loss >= np.mean(val_losses): best_validation_loss = np.mean(val_losses) torch.save( retinanet.module, parser.model_output_dir + '/best_result_valloss.pt') writer.add_scalar('Validation/Loss', np.mean(val_losses), epoch_num) # Calculate Validation mAP print('Evaluating validation mAP') mAP = csv_eval.evaluate(dataset_val, retinanet) print("Validation mAP: " + str(mAP[0][0])) if best_validation_map is None: best_validation_map = mAP[0][0] elif best_validation_map < mAP[0][0]: best_validation_map = mAP[0][0] torch.save( retinanet.module, parser.model_output_dir + '/best_result_valmAP.pt') writer.add_scalar('Validation/mAP', mAP[0][0], epoch_num) if not parser.train_all_labeled_data: scheduler.step(np.mean(val_losses)) else: scheduler.step() torch.save( retinanet.module, parser.model_output_dir + '/retinanet_{}.pt'.format(epoch_num)) retinanet.eval() torch.save(retinanet, parser.model_output_dir + '/model_final.pt')
def bbox_extraction(file_list='./data/images2.csv'): weights_path = './models/csv_retinanet_25.pt' csv_classes = './classes.csv' dataset_val = CSVDataset(train_file=file_list, class_list=csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) # dataset_val = CSVDataset(train_file=file_list, class_list= csv_classes, transform=transforms.Compose([Normalizer()])) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=False) retinanet.load_state_dict(torch.load(weights_path)) use_gpu = True if torch.cuda.is_available(): device = torch.device("cuda") if use_gpu: retinanet = retinanet.to(device) retinanet.eval() unnormalize = UnNormalizer() for idx, data in enumerate(dataloader_val): with torch.no_grad(): scores, classification, transformed_anchors = retinanet( data['img'].to(device).float()) def get_bbox(classification, transformed_anchors, label=0): bbox = {} idx = np.where(classification == label)[0][0] co_ord = transformed_anchors[idx, :] bbox['x1'] = int(co_ord[0]) bbox['y1'] = int(co_ord[1]) bbox['x2'] = int(co_ord[2]) bbox['y2'] = int(co_ord[3]) return bbox scores = scores.cpu().numpy() classification = classification.cpu().numpy() transformed_anchors = transformed_anchors.cpu().numpy() # print('scores:',scores) # print('classification:', classification) # print('transformed_anchors', transformed_anchors) bbox = {} bbox['neck'] = get_bbox(classification, transformed_anchors, label=0) bbox['stomach'] = get_bbox(classification, transformed_anchors, label=1) # print('neck',bbox['neck'] ) # print('stomach',bbox['stomach'] ) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) cv2.rectangle(img, (bbox['neck']['x1'], bbox['neck']['y1']), (bbox['neck']['x2'], bbox['neck']['y2']), color=(0, 0, 255), thickness=2) cv2.rectangle(img, (bbox['stomach']['x1'], bbox['stomach']['y1']), (bbox['stomach']['x2'], bbox['stomach']['y2']), color=(0, 0, 255), thickness=2) # cv2.imshow('img', img) # cv2.imwrite('./sample_11.jpg',img) # cv2.waitKey(0) return bbox # bbox_extraction() # if __name__ == '__main__': # main()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) #retinanet = torch.load(parser.model) retinanet = model.resnet50(num_classes=80, pretrained=True) retinanet.load_state_dict(torch.load(parser.model)) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() if not os.path.isdir("./detection_files"): os.makedirs("./detection_files") for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet( data['img'].cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores > 0.35) img_name = data['img_name'].split('.')[0] with open("./detection_files/" + img_name + '.txt', 'w') as f: for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] f.write('{},{},{},{},label_name'.format( x1, y1, x2, y2, label_name)) if j < idxs[0].shape[0] - 1: f.write('\n')
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset',default="csv", help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path',default="/home/mayank-s/PycharmProjects/Datasets/coco",help='Path to COCO directory') parser.add_argument('--csv_train',default="berkely_ready_to_train_for_retinanet_pytorch.csv", help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes',default="berkely_class.csv", help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=200) # parser.add_argument('--resume', default=0, help='resume from checkpoint') parser = parser.parse_args(args) # print(args.resume) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2014', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2014', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=0, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True # if use_gpu: if torch.cuda.is_available(): retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True ###################################################################################3 # # args.resume=0 # Resume_model = False # start_epoch=0 # if Resume_model: # print('==> Resuming from checkpoint..') # checkpoint = torch.load('./checkpoint/saved_with_epochs/retina_fpn_1') # retinanet.load_state_dict(checkpoint['net']) # best_loss = checkpoint['loss'] # start_epoch = checkpoint['epoch'] # print('Resuming from epoch:{ep} loss:{lp}'.format(ep=start_epoch, lp=best_loss)) ##################################################################################### optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) retinanet = torch.load("./checkpoint/retina_fpn_1") # epoch_num=start_epoch for epoch_num in range(parser.epochs): # retinanet.train()retina_fpn_1 # retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() if torch.cuda.is_available(): classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) else: classification_loss, regression_loss = retinanet([data['img'].float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue # print("Saving model...") # name = "./checkpoint/retina_fpn_" + str(epoch_num) # torch.save(retinanet, name) # ###################################################################333 print('Saving..') state = { 'net': retinanet.module.state_dict(), 'loss': loss_hist, 'epoch': epoch_num, } if not os.path.isdir('checkpoint/saved_with_epochs'): os.mkdir('checkpoint/saved_with_epochs') # checkpoint_path="./checkpoint/Ckpt_"+ name = "./checkpoint/saved_with_epochs/retina_fpn_" + str(epoch_num) torch.save(state, name) # torch.save(state, './checkpoint/retinanet.pth') ##################################################################### '''if parser.dataset == 'coco':
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', default="csv", help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', default="./data/train_only.csv", help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', default="./data/classes.csv", help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', default="./data/train_only.csv", help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--voc_train', default="./data/voc_train", help='Path to containing images and annAnnotations') parser.add_argument('--voc_val', default="./data/bov_train", help='Path to containing images and annAnnotations') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=101) parser.add_argument('--epochs', help='Number of epochs', type=int, default=40) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'voc': if parser.voc_train is None: raise ValueError( 'Must provide --voc_train when training on PASCAL VOC,') dataset_train = XML_VOCDataset( img_path=parser.voc_train + 'JPEGImages/', xml_path=parser.voc_train + 'Annotations/', class_list=class_list, transform=transforms.Compose( [Normalizer(), Augmenter(), ResizerMultiScale()])) if parser.voc_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = XML_VOCDataset( img_path=parser.voc_val + 'JPEGImages/', xml_path=parser.voc_val + 'Annotations/', class_list=class_list, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=1, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=2, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=2, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=15, verbose=True, mode="max") #scheduler = optim.lr_scheduler.StepLR(optimizer,8) loss_hist = collections.deque(maxlen=1024) retinanet.train() retinanet.module.freeze_bn() if not os.path.exists("./logs"): os.mkdir("./logs") log_file = open("./logs/log.txt", "w") print('Num training images: {}'.format(len(dataset_train))) best_map = 0 print("Training models...") for epoch_num in range(parser.epochs): #scheduler.step(epoch_num) retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): #print('iter num is: ', iter_num) try: #print(csv_eval.evaluate(dataset_val[:20], retinanet)[0]) #print(type(csv_eval.evaluate(dataset_val, retinanet))) #print('iter num is: ', iter_num % 10 == 0) optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss #print(loss) if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) if iter_num % 50 == 0: print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) log_file.write( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f} \n' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) elif parser.dataset == 'voc' and parser.voc_val is not None: print('Evaluating dataset') mAP = voc_eval.evaluate(dataset_val, retinanet) try: is_best_map = mAP[0][0] > best_map best_map = max(mAP[0][0], best_map) except: pass if is_best_map: print("Get better map: ", best_map) torch.save(retinanet.module, './logs/{}_scale15_{}.pt'.format(epoch_num, best_map)) shutil.copyfile( './logs/{}_scale15_{}.pt'.format(epoch_num, best_map), "./best_models/model.pt") else: print("Current map: ", best_map) scheduler.step(best_map) retinanet.eval() torch.save(retinanet, './logs/model_final.pt')
def visualize(csv_val, csv_classes, model): dataset = "csv" if dataset == 'csv': dataset_val = CSVDataset(train_file=csv_val, class_list=csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 180, 0), 1) def draw_caption_original(image, box, caption): b = np.array(box).astype(int) #print("b", b) cv2.putText(image, caption, (b[0], b[3] + 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[3] + 20), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 180), 1) #B kaggle_ouput = [] for idx, data in enumerate(dataloader_val): print(idx) kaggle_row = [] with torch.no_grad(): st = time.time() #print("data shape:", data['img'].shape) scores, classification, transformed_anchors = retinanet( data['img'].cuda().float()) #print('Elapsed time: {}'.format(time.time()-st)) idxs = np.where(scores > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() print('Scores', scores) #print("name", data['name']) img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) kaggle_row.append(get_filename(data['name'][0])) row = '' for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), "Predicted opacity") cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2) #print(x1, y1, x2, y2) if (j == 0): row = row + str(round( scores[j].item(), 2)) + " " + str(x1) + ' ' + str( y1) + ' ' + str(x2 - x1) + ' ' + str(y2 - y1) pass else: row = row + " " + str(round( scores[j].item(), 2)) + " " + str(x1) + ' ' + str( y1) + ' ' + str(x2 - x1) + ' ' + str(y2 - y1) for ann in data['annot']: for annotation in ann: #print("Original annot:", ann) if annotation[0] != -1: draw_caption_original(img, (annotation[0], annotation[1], annotation[2], annotation[3]), "Real opacity") cv2.rectangle(img, (annotation[0], annotation[1]), (annotation[2], annotation[3]), color=(0, 0, 255), thickness=2) pass cv2.imshow('img', img) kaggle_row.append(row) #print(kaggle_row) #print(idxs) kaggle_ouput.append(kaggle_row) cv2.waitKey(0) import pandas as pd pd.DataFrame(kaggle_ouput, columns=[ 'patientId', 'PredictionString' ]).to_csv("/home/jdmaestre/PycharmProjects/test_kaggle.csv")
def main(args=None): parser = argparse.ArgumentParser( description='Simple testing script for RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default="csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)', default="binary_class.csv") parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--csv_box_annot', help='Path to file containing predicted box annotations ') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--model', help='Path of .pt file with trained model', default='esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save', default='trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept', default=0.15) parser.add_argument('--nms_threshold', help='Score above which boxes are kept', default=0.2) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement', default=100) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition', default=50) parser.add_argument('--seg_level', help='Line or word, to choose anchor aspect ratio', default='line') parser.add_argument( '--htr_gt_box', help='Train recognition branch with box gt (for debugging)', default=False) parser.add_argument( '--binary_classifier', help= 'Wether to use classification branch as binary or not, multiclass instead.', default='False') parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'csv': if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) if parser.csv_box_annot is not None: box_annot_data = CSVDataset(train_file=parser.csv_box_annot, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: box_annot_data = None else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if box_annot_data is not None: sampler_val = AspectRatioBasedSampler(box_annot_data, batch_size=1, drop_last=False) dataloader_box_annot = DataLoader(box_annot_data, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) else: dataloader_box_annot = dataloader_val if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model alphabet = dataset_val.alphabet if os.path.exists(parser.model): retinanet = torch.load(parser.model) else: print("Choose an existing saved model path.") sys.exit() use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() #retinanet = torch.load('../Documents/TRAINED_MODELS/pytorch-retinanet/esposallescsv_retinanet_99.pt') #print "LOADED pretrained MODEL\n\n" optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.module.freeze_bn() best_cer = 1000 epochs_no_improvement = 0 cers = [] retinanet.eval() retinanet.module.epochs_only_det = 0 #retinanet.module.htr_gt_box = False retinanet.training = False if parser.score_threshold is not None: retinanet.module.score_threshold = float(parser.score_threshold) '''if parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') ''' mAP, binary_mAP, cer = csv_eval.evaluate( dataset_val, retinanet, score_threshold=retinanet.module.score_threshold)
from torch.utils.data import Dataset, DataLoader from dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer, TXTDataset from torchvision import datasets, models, transforms import argparse import csv_eval ap = argparse.ArgumentParser(description='Just for eval.') ap.add_argument('-B', '--ImageNamePath') ap.add_argument('-A', '--AnnotationPath') ap.add_argument('-I', '--ImagePath') args = vars(ap.parse_args()) dataset_val = TXTDataset(ImgNamePth=args['ImageNamePath'], AnnoPth=args['AnnotationPath'], ImgPth=args['ImagePath'], transform=transforms.Compose( [Normalizer(), Resizer()])) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) modelpath = os.path.join(os.path.dirname(__file__), 'weights/model.pt') modelretina = torch.load(modelpath) modelretina.cuda() modelretina.eval() AP = csv_eval.evaluate(dataset_val, modelretina) map = (AP[0][0] + AP[1][0]) / 2 print("mAp:" + str(map))
def main(): data_type = 'coco' data_root_dir = '/data/data_coco/' # model_depth = 50 epoch_max = 100 batch_size = 8 if data_type == 'coco': dataset_train = CocoDataset(data_root_dir, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(data_root_dir, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) else: print('暂不支持') sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=True) loader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=batch_size, drop_last=True) loader_val = DataLoader(dataset_val, num_workers=8, collate_fn=collater, batch_sampler=sampler_val) retinanet = model.retinanet_50(dataset_train.num_classes(), pretrained=True) retinanet = retinanet.cuda() optimizer = torch.optim.Adam(retinanet.parameters(), lr=1e-4) # optimizer = torch.optim.SGD(retinanet.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True, factor=0.5) model_pretrain_dir = './model/model_final.pt' if os.path.exists(model_pretrain_dir): print('pretrain model exist!') retinanet = torch.load(model_pretrain_dir) print('train images num: {}'.format(len(loader_train) * batch_size)) for epoch_num in range(epoch_max): retinanet.train() epoch_loss = [] for iter_num, data in enumerate(loader_train): optimizer.zero_grad() input_tensor = [data['img'].cuda().float(), data['annot']] classification_loss, regression_loss = retinanet(input_tensor) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss epoch_loss.append(float(loss)) if loss.item() == 0: continue loss.backward() optimizer.step() print( 'Epoch:{}/{} | Iters:{}/{} | C loss:{:.4f} | R loss:{:.4f} | Current loss:{:.4f} | Current LR:{:.7f}' .format(epoch_num + 1, epoch_max, iter_num + 1, len(loader_train), float(classification_loss), float(regression_loss), np.mean(epoch_loss), optimizer.param_groups[0]['lr'])) del classification_loss del regression_loss # 每个epoch 进行验证一次 eval.eval_coco(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet, './model/{}_retinanet_{}.pt'.format(data_type, epoch_num + 1)) retinanet.eval() torch.save(retinanet, './model/model_final.pt')
def main(args=None): parser = argparse.ArgumentParser(description='Training script for training a EfficientDet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--phi', help='EfficientNet scaling coefficient.', type=int, default=0) parser.add_argument('--batch-size', help='Batch size', type=int, default=8) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(img_size=512)])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer(img_size=512)])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model efficientdet = model.efficientdet(num_classes=dataset_train.num_classes(), pretrained=True, phi=parser.phi) use_gpu = True if use_gpu: efficientdet = efficientdet.cuda() efficientdet = torch.nn.DataParallel(efficientdet).cuda() efficientdet.training = True optimizer = optim.Adam(efficientdet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) efficientdet.train() efficientdet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): efficientdet.train() efficientdet.module.freeze_bn() epoch_loss = [] print(('\n' + '%10s' * 5) % ('Epoch', 'gpu_mem', 'Loss', 'cls', 'rls')) pbar = tqdm(enumerate(dataloader_train), total=len(dataloader_train)) for iter_num, data in pbar: try: optimizer.zero_grad() classification_loss, regression_loss = efficientdet([data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(efficientdet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) loss = (loss * iter_num) / (iter_num + 1) # update mean losses mem = torch.cuda.memory_cached() / 1E9 if torch.cuda.is_available() else 0 # (GB) s = ('%10s' * 2 + '%10.3g' * 3) % ( '%g/%g' % (epoch_num, parser.epochs - 1), '%.3gG' % mem, np.mean(loss_hist), float(regression_loss), float(classification_loss)) pbar.set_description(s) del classification_loss del regression_loss except Exception as e: raise(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, efficientdet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, efficientdet) scheduler.step(np.mean(epoch_loss)) torch.save(efficientdet.module, '{}_retinanet_{}.pt'.format(parser.dataset, epoch_num)) efficientdet.eval() torch.save(efficientdet, 'model_final.pt'.format(epoch_num))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default="csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)', default="binary_class.csv") parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--epochs_only_det', help='Number of epochs to train detection part', type=int, default=1) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement', type=int, default=100) parser.add_argument('--pretrained_model', help='Path of .pt file with pretrained model', default='esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save', default='trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept', type=float, default=0.5) parser.add_argument('--nms_threshold', help='Score above which boxes are kept', type=float, default=0.2) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition', default=95) parser.add_argument('--seg_level', help='[line, word], to choose anchor aspect ratio', default='word') parser.add_argument( '--early_stop_crit', help='Early stop criterion, detection (map) or transcription (cer)', default='cer') parser.add_argument('--max_iters_epoch', help='Max steps per epoch (for debugging)', default=1000000) parser.add_argument('--train_htr', help='Train recognition or not', default='True') parser.add_argument('--train_det', help='Train detection or not', default='True') parser.add_argument( '--binary_classifier', help= 'Wether to use classification branch as binary or not, multiclass instead.', default='False') parser.add_argument( '--htr_gt_box', help='Train recognition branch with box gt (for debugging)', default='False') parser.add_argument( '--ner_branch', help='Train named entity recognition with separate branch', default='False') parser = parser.parse_args(args) if parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train') dataset_name = parser.csv_train.split("/")[-2] dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # Files for training log experiment_id = str(time.time()).split('.')[0] valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'w') for arg in vars(parser): if getattr(parser, arg) is not None: valid_cer_f.write( str(arg) + ' ' + str(getattr(parser, arg)) + '\n') current_commit = subprocess.check_output(['git', 'rev-parse', 'HEAD']) valid_cer_f.write(str(current_commit)) valid_cer_f.write( "epoch_num cer best cer mAP best mAP time\n") valid_cer_f.close() sampler = AspectRatioBasedSampler(dataset_train, batch_size=1, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model train_htr = parser.train_htr == 'True' htr_gt_box = parser.htr_gt_box == 'True' ner_branch = parser.ner_branch == 'True' binary_classifier = parser.binary_classifier == 'True' torch.backends.cudnn.benchmark = False alphabet = dataset_train.alphabet if os.path.exists(parser.pretrained_model): retinanet = torch.load(parser.pretrained_model) retinanet.classificationModel = ClassificationModel( num_features_in=256, num_anchors=retinanet.anchors.num_anchors, num_classes=dataset_train.num_classes()) if ner_branch: retinanet.nerModel = NERModel( feature_size=256, pool_h=retinanet.pool_h, n_classes=dataset_train.num_classes(), pool_w=retinanet.pool_w) else: if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, max_boxes=int(parser.max_boxes), score_threshold=float( parser.score_threshold), seg_level=parser.seg_level, alphabet=alphabet, train_htr=train_htr, htr_gt_box=htr_gt_box, ner_branch=ner_branch, binary_classifier=binary_classifier) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, max_boxes=int(parser.max_boxes), score_threshold=float( parser.score_threshold), seg_level=parser.seg_level, alphabet=alphabet, train_htr=train_htr, htr_gt_box=htr_gt_box) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True train_htr = parser.train_htr == 'True' train_det = parser.train_det == 'True' retinanet.htr_gt_box = parser.htr_gt_box == 'True' retinanet.train_htr = train_htr retinanet.epochs_only_det = parser.epochs_only_det if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=50, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.train() retinanet.module.freeze_bn() best_cer = 1000 best_map = 0 epochs_no_improvement = 0 verbose_each = 20 optimize_each = 1 objective = 100 best_objective = 10000 print(('Num training images: {}'.format(len(dataset_train)))) for epoch_num in range(parser.epochs): cers = [] retinanet.training = True retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): if iter_num > int(parser.max_iters_epoch): break try: if iter_num % optimize_each == 0: optimizer.zero_grad() (classification_loss, regression_loss, ctc_loss, ner_loss) = retinanet([ data['img'].cuda().float(), data['annot'], ctc, epoch_num ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() if train_det: if train_htr: loss = ctc_loss + classification_loss + regression_loss + ner_loss else: loss = classification_loss + regression_loss + ner_loss elif train_htr: loss = ctc_loss else: continue if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) if iter_num % verbose_each == 0: print(( 'Epoch: {} | Step: {} |Classification loss: {:1.5f} | Regression loss: {:1.5f} | CTC loss: {:1.5f} | NER loss: {:1.5f} | Running loss: {:1.5f} | Total loss: {:1.5f}\r' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), float(ctc_loss), float(ner_loss), np.mean(loss_hist), float(loss), "\r"))) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) torch.cuda.empty_cache() except Exception as e: print(e) continue if parser.dataset == 'csv' and parser.csv_val is not None and train_det: print('Evaluating dataset') mAP, text_mAP, current_cer = csv_eval.evaluate( dataset_val, retinanet, score_threshold=parser.score_threshold) #text_mAP,_ = csv_eval_binary_map.evaluate(dataset_val, retinanet,score_threshold=parser.score_threshold) objective = current_cer * (1 - mAP) retinanet.eval() retinanet.training = False retinanet.score_threshold = float(parser.score_threshold) '''for idx,data in enumerate(dataloader_val): if idx>int(parser.max_iters_epoch): break print("Eval CER on validation set:",idx,"/",len(dataset_val),"\r") image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2] #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val) text_gt =".".join(dataset_val.image_names[idx].split('.')[:-1])+'.txt' f =open(text_gt,'r') text_gt_lines=f.readlines()[0] transcript_pred = get_transcript(image_name,data,retinanet,float(parser.score_threshold),float(parser.nms_threshold),dataset_val,alphabet) cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))''' t = str(time.time()).split('.')[0] valid_cer_f.close() #print("GT",text_gt_lines) #print("PREDS SAMPLE:",transcript_pred) if parser.early_stop_crit == 'cer': if float(objective) < float( best_objective): #float(current_cer)<float(best_cer): best_cer = current_cer best_objective = objective epochs_no_improvement = 0 torch.save( retinanet.module, 'trained_models/' + parser.model_out + '{}_retinanet.pt'.format(parser.dataset)) else: epochs_no_improvement += 1 if mAP > best_map: best_map = mAP elif parser.early_stop_crit == 'map': if mAP > best_map: best_map = mAP epochs_no_improvement = 0 torch.save( retinanet.module, 'trained_models/' + parser.model_out + '{}_retinanet.pt'.format(parser.dataset)) else: epochs_no_improvement += 1 if float(current_cer) < float(best_cer): best_cer = current_cer if train_det: print(epoch_num, "mAP: ", mAP, " best mAP", best_map) if train_htr: print("VALID CER:", current_cer, "best CER", best_cer) print("Epochs no improvement:", epochs_no_improvement) valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'a') valid_cer_f.write( str(epoch_num) + " " + str(current_cer) + " " + str(best_cer) + ' ' + str(mAP) + ' ' + str(best_map) + ' ' + str(text_mAP) + '\n') if epochs_no_improvement > 3: for param_group in optimizer.param_groups: if param_group['lr'] > 10e-5: param_group['lr'] *= 0.1 if epochs_no_improvement >= parser.max_epochs_no_improvement: print("TRAINING FINISHED AT EPOCH", epoch_num, ".") sys.exit() scheduler.step(np.mean(epoch_loss)) torch.cuda.empty_cache() retinanet.eval()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--optimizer', help='[SGD | Adam]', type=str, default='SGD') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) # Create the data loaders print("\n[Phase 1]: Creating DataLoader for {} dataset".format( parser.dataset)) if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2014', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2014', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=16, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=8, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') print('| Num training images: {}'.format(len(dataset_train))) print('| Num test images : {}'.format(len(dataset_val))) print("\n[Phase 2]: Preparing RetinaNet Detection Model...") use_gpu = torch.cuda.is_available() if use_gpu: device = torch.device('cuda') retinanet = retinanet.to(device) retinanet = torch.nn.DataParallel(retinanet, device_ids=range( torch.cuda.device_count())) print("| Using %d GPUs for Train/Validation!" % torch.cuda.device_count()) retinanet.training = True if parser.optimizer == 'Adam': optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) # not mentioned print("| Adam Optimizer with Learning Rate = {}".format(1e-5)) elif parser.optimizer == 'SGD': optimizer = optim.SGD(retinanet.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4) print("| SGD Optimizer with Learning Rate = {}".format(1e-2)) else: raise ValueError('Unsupported Optimizer, must be one of [SGD | Adam]') scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn( ) # Freeze the BN parameters to ImageNet configuration # Check if there is a 'checkpoints' path if not osp.exists('./checkpoints/'): os.makedirs('./checkpoints/') print("\n[Phase 3]: Training Model on {} dataset...".format( parser.dataset)) for epoch_num in range(parser.epochs): epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].to(device), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.001) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) sys.stdout.write('\r') sys.stdout.write( '| Epoch: {} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num + 1, iter_num + 1, len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist))) sys.stdout.flush() del classification_loss del regression_loss except Exception as e: print(e) continue print("\n| Saving current best model at epoch {}...".format(epoch_num + 1)) torch.save( retinanet.state_dict(), './checkpoints/{}_retinanet_{}.pt'.format(parser.dataset, epoch_num + 1)) if parser.dataset == 'coco': #print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, device) elif parser.dataset == 'csv' and parser.csv_val is not None: #print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet, device) scheduler.step(np.mean(epoch_loss)) retinanet.eval() torch.save(retinanet.state_dict(), './checkpoints/model_final.pt')
def train(img_dir,classes_csv,model_fname=None,resnet_depth=50,epochs=1000,steps=100,train_split=0.8,out_dir ='',out_prefix=''): if not os.path.exists(out_dir): os.makedirs(out_dir) # Create the data loaders # Get all image fnames in folder img_list = [] if not isinstance(img_dir, list): img_dir = [img_dir] for dir in img_dir: for file in os.listdir(dir): if file.endswith(".png"): img_list.append(dir + file) randomised_list = random.sample(img_list, len(img_list)) num_train = int(0.8*len(img_list)) train_imgs, val_imgs = randomised_list[:num_train], randomised_list[num_train:] dataset_train = CustomDataset(img_list=train_imgs, class_list=classes_csv, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CustomDataset(img_list=val_imgs, class_list=classes_csv,transform=transforms.Compose([Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if resnet_depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif resnet_depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif resnet_depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif resnet_depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif resnet_depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # retinanet = torch.load(model_fname) if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() if model_fname is not None: retinanet.load_state_dict(torch.load(model_fname)) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() start_time = time.clock() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) # print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue print('Epoch: {} | Running loss: {:1.5f} | Elapsed Time: {}'.format(epoch_num, np.mean(loss_hist),(time.clock() - start_time)/60)) mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) if (epoch_num) % steps == 0: torch.save(retinanet.module, '{}{}_model_{}.pt'.format(out_dir, out_prefix, epoch_num)) torch.save(retinanet.state_dict(), '{}{}_state_{}.pt'.format(out_dir, out_prefix, epoch_num)) torch.save(retinanet, out_dir + '{}model_final.pt'.format(out_prefix)) torch.save(retinanet.state_dict(), out_dir + '{}state_final_.pt'.format(out_prefix))
if args.csv_val is not None: dataset_val = CSVDataset( train_file=args.csv_val, class_list=args.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]) ) else: dataset_val = None print('No validation annotations provided.') dataloader_train = DataLoader( dataset_train, num_workers=4, collate_fn=collater, batch_sampler=AspectRatioBasedSampler(dataset_train, batch_size=args.batch_size, drop_last=False), ) if dataset_val is not None: dataloader_val = DataLoader( dataset_val, num_workers=4, collate_fn=collater, batch_sampler=AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False), ) # -- # Define model device_ids = [int(device_id) for device_id in args.device_ids.split(',')]
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=50) parser.add_argument('--model_name', help='name of the model to save') parser.add_argument('--pretrained', help='pretrained model name') parser = parser.parse_args(args) # Create the data loaders dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Resizer(), Augmenter(), Normalizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Resizer(), Normalizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) #dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_size=8, shuffle=True) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=2, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=16, collate_fn=collater, batch_sampler=sampler_val) #dataloader_val = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_size=8, shuffle=True) # Create the model_pose_level_attention if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes()) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes()) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes()) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes()) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes()) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if ckpt: retinanet = torch.load('') print('load ckpt') else: retinanet_dict = retinanet.state_dict() pretrained_dict = torch.load('./weight/' + parser.pretrained) pretrained_dict = { k: v for k, v in pretrained_dict.items() if k in retinanet_dict } retinanet_dict.update(pretrained_dict) retinanet.load_state_dict(retinanet_dict) print('load pretrained backbone') print(retinanet) retinanet = torch.nn.DataParallel(retinanet, device_ids=[0]) retinanet.cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) #optimizer = optim.SGD(retinanet.parameters(), lr=1e-3, momentum=0.9, weight_decay=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) #scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) f_map = open('./mAP_txt/' + parser.model_name + '.txt', 'a') writer = SummaryWriter(log_dir='./summary') iters = 0 for epoch_num in range(0, parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] #scheduler.step() for iter_num, data in enumerate(dataloader_train): iters += 1 optimizer.zero_grad() classification_loss_f, regression_loss_f, classification_loss_v, regression_loss_v = retinanet( [ data['img'].cuda().float(), data['annot'], data['vbox'], data['ignore'] ]) classification_loss_f = classification_loss_f.mean() regression_loss_f = regression_loss_f.mean() classification_loss_v = classification_loss_v.mean() regression_loss_v = regression_loss_v.mean() loss = classification_loss_f + regression_loss_f + classification_loss_v + regression_loss_v if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss_f: {:1.5f} | Regression loss_f: {:1.5f} | Classification loss_v {:1.5f} | Regression loss_v {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss_f), float(regression_loss_f), float(classification_loss_v), float(regression_loss_v), np.mean(loss_hist))) writer.add_scalar('classification_loss_f', classification_loss_f, iters) writer.add_scalar('regression_loss_f', regression_loss_f, iters) writer.add_scalar('classification_loss_v', classification_loss_v, iters) writer.add_scalar('regression_loss_v', regression_loss_v, iters) writer.add_scalar('loss', loss, iters) if parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) f_map.write('mAP:{}, epoch:{}'.format(mAP[0][0], epoch_num)) f_map.write('\n') scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, './ckpt/' + parser.model_name + '_{}.pt'.format(epoch_num)) retinanet.eval() writer.export_scalars_to_json( "./summary/' + parser.pretrained + 'all_scalars.json") f_map.close() writer.close()
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--data_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--net', help='Network to use', default='fasterrcnn') parser.add_argument('--model', help='Path to model (.pt) file.') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser = parser.parse_args(args) if parser.dataset == 'coco': raise NotImplementedError() dataset_val = CocoDataset(parser.data_path, set_name='val2017', transform=Compose([Normalizer(), Resizer()])) elif parser.dataset == 'openimages': dataset_val = OidDataset(parser.data_path, subset='validation', transform=Compose([ToTensor()])) elif parser.dataset == 'csv': raise NotImplementedError() dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=Compose([Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collate_fn, batch_sampler=sampler_val) # Create the model model = create_detection_model(dataset_val.num_classes(), parser) checkpoint = torch.load(parser.model, map_location=lambda storage, loc: storage) weights = checkpoint['model'] weights = {k.replace('module.', ''): v for k, v in weights.items()} model.load_state_dict(weights) if use_gpu: model = model.cuda() model.eval() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() images, targets = data # targets = [{k: v.cuda() for k, v in t.items()} for t in targets] if use_gpu: input_images = list(image.cuda().float() for image in images) else: input_images = list(image.float() for image in images) # TODO: adapt retinanet output to the one by torchvision 0.3 # scores, classification, transformed_anchors = model(data_img.float()) outputs = model(input_images) outputs = [{k: v.cpu() for k, v in t.items()} for t in outputs] output = outputs[0] # take the only batch scores = output['scores'] classification = output['labels'] transformed_anchors = output['boxes'] # from here, interface to the code already written in the original repo print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores > thres) img = np.array(255 * images[0]).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) ''' # Visualize ground truth bounding boxes for bbox, label in zip(targets[0]['boxes'], targets[0]['labels']): # bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(label)] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=1) print('GT: '+label_name) ''' for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print('Detection: ' + label_name) cv2.imshow('img', img) cv2.waitKey(0)