def get_data(dataset, dataroot, augment, resize=608, split=0.15, split_idx=0, multinode=False, target_lb=-1): transform_train = transforms.Compose( [Normalizer(), Augmenter(), Resizer(min_side=resize)]) transform_test = transforms.Compose( [Normalizer(), Resizer(min_side=resize)]) if isinstance(C.get().aug, list): logger.debug('augmentation provided.') policies = policy_decoder(augment, augment['num_policy'], augment['num_op']) transform_train.transforms.insert( 0, Augmentation(policies, detection=True)) if dataset == 'coco': total_trainset = CocoDataset(dataroot, set_name='train', transform=transform_train) testset = CocoDataset(dataroot, set_name='val', transform=transform_test) return total_trainset, testset
def demo(image_lists): classes = ["gangjin"] model = "./best_models/model.pt" retinanet = torch.load(model) retinanet = retinanet.cuda() retinanet.eval() #detect transforms = T.Compose([ Normalizer(), Resizer() ]) for filename in image_lists: image = skimage.io.imread(filename) sampler = {"img":image.astype(np.float32)/255.0,"annot":np.empty(shape=(5,5))} image_tf = transforms(sampler) scale = image_tf["scale"] new_shape = image_tf['img'].shape x = torch.autograd.Variable(image_tf['img'].unsqueeze(0).transpose(1,3), volatile=True) with torch.no_grad(): scores,_,bboxes = retinanet(x.cuda().float()) bboxes /= scale scores = scores.cpu().data.numpy() bboxes = bboxes.cpu().data.numpy() # select threshold idxs = np.where(scores > threshold)[0] scores = scores[idxs] bboxes = bboxes[idxs] #embed() for i,box in enumerate(bboxes): cv2.rectangle(image,(int(box[1]),int(box[0])),(int(box[3]),int(box[2])),color=(0,0,255),thickness=2 ) results_file.write(filename.split("/")[-1] +","+ str(int(box[1])) + " " + str(int(box[0])) + " " + str(int(box[3])) + " " +str(int(box[2])) + "\n") print("Predicting image: %s "%filename) cv2.imwrite("./outputs/%s"%filename.split("/")[-1],image)
def fan_detect(model, img_name, threshold=0.9, max_detections=100, is_cuda=True): input_data = { 'img': _load_image(img_name), 'annot': np.zeros((0, 5)), 'scale': 1 } transform = transforms.Compose([Resizer(), Normalizer()]) transformed = transform(input_data) model.eval() with torch.no_grad(): img_data = transformed['img'].permute(2, 0, 1).float().unsqueeze(dim=0) if is_cuda: img_data = img_data.cuda() scores, labels, boxes = model(img_data) scores = scores.cpu().numpy() scale = transformed['scale'] boxes = boxes.cpu().numpy() / scale indices = np.where(scores > threshold)[0] scores = scores[indices] scores_sort = np.argsort(-scores)[:max_detections] image_boxes = boxes[indices[scores_sort], :] return image_boxes
def detect_single_image(checkpoint, image_path, visualize=False): device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu') configs = deepcopy(checkpoint['model_specs']['training_configs']) configs = configs.update(checkpoint['hp_values']) labels = checkpoint['labels'] num_classes = len(labels) retinanet = ret50(num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter retinanet.load_state_dict(checkpoint['model']) retinanet = retinanet.to(device=device) retinanet.eval() img = skimage.io.imread(image_path) if len(img.shape) == 2: img = skimage.color.gray2rgb(img) img = img.astype(np.float32) / 255.0 transform = transforms.Compose([Normalizer(), Resizer(min_side=608)]) #TODO: make this dynamic data = transform({'img': img, 'annot': np.zeros((0, 5))}) img = data['img'] img = img.unsqueeze(0) img = img.permute(0, 3, 1, 2) with torch.no_grad(): scores, classification, transformed_anchors = retinanet(img.to(device=device).float()) idxs = np.where(scores.cpu() > 0.5)[0] scale = data['scale'] detections_list = [] for j in range(idxs.shape[0]): bbox = transformed_anchors[idxs[j], :] label_idx = int(classification[idxs[j]]) label_name = labels[label_idx] score = scores[idxs[j]].item() # un resize for eval against gt bbox /= scale bbox.round() x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)]) img_name = image_path.split('/')[-1].split('.')[0] filename = img_name + '.txt' path = os.path.dirname(image_path) filepathname = os.path.join(path, filename) with open(filepathname, 'w', encoding='utf8') as f: for single_det_list in detections_list: for i, x in enumerate(single_det_list): f.write(str(x)) f.write(' ') f.write('\n') if visualize: unnormalize = UnNormalizer() return filepathname
def __init__(self): self.model = None self.transform = transforms.Compose([Normalizer(), Resizer()]) self.unnormalize = UnNormalizer() self.overlap_threshold = 0.6 self.score_threshold = 0.5 self.distance_threshold = 1. self.bboxes = None
def init_data(parser, verb_orders): dataset_train = CSVDataset(train_file=parser.train_file, class_list=parser.classes_file, verb_info= verb_orders, is_training=True, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer(True)])) if parser.val_file is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.val_file, class_list=parser.classes_file, verb_info= verb_orders, is_training=False, transform=transforms.Compose([Normalizer(), Resizer(False)])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=parser.batch_size, drop_last=True) dataloader_train = DataLoader(dataset_train, num_workers=64, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=parser.batch_size, drop_last=True) dataloader_val = DataLoader(dataset_val, num_workers=64, collate_fn=collater, batch_sampler=sampler_val) return dataloader_train, dataset_train, dataloader_val, dataset_val
def main(): transform=transforms.Compose([Normalizer(), Resizer()]) annot = np.array([[10,10,20,20,0],], dtype=np.float64) parser = argparse.ArgumentParser(description='测试模型效果.') #parser.add_argument('--model', help='Path to model (.pt) file.', default='/home/hao.wyh/code/git/pytorch-retinanet/output_models/main_detect_v11_restart/model_final.pt') parser.add_argument('-m', dest='model', help='Path to model (.pt) file.', default='/home/hao.wyh/code/git/pytorch-retinanet/output_models/true_data_v2_mix/coco_retinanet_3.pt') #parser.add_argument('--model', help='Path to model (.pt) file.', default='output_models/main_detect_v10_deeper8/model_final.pt') parser.add_argument('-o', dest='output_path', help='Path to save output imgs.', default='./tmp_out/') #parser.add_argument('--input_path', help='Path to save output imgs.', default='/home/hao.wyh/jupyter/黑边/smart_reverse_label') #parser.add_argument('--input_path', help='Path to save output imgs.', default='/home/hao.wyh/jupyter/黑边/评估任务/black_imgs') parser.add_argument('-i', dest='input_path', help='Path to save output imgs.', default='/home/hao.wyh/jupyter/黑边/评估任务/3k_imgs') parser.add_argument('-s', dest='show_out_im', action="store_true" , help='是否测试模型准召率') parser.add_argument('-t', dest='test', action="store_true" , help='是否测试模型准召率') parser.add_argument('-ot', dest='only_test', action="store_true" , help='是否测试模型准召率') parser = parser.parse_args() if parser.only_test: test(parser.output_path) exit() ll = glob(parser.input_path+'/*jpg') if len(ll) == 0: ll = glob(parser.input_path+'/*jpeg') if not os.path.exists(parser.output_path): os.mkdir(parser.output_path) res_list, w_h_list = predict(ll, parser.model) res = [] for idx in tqdm(range(len(res_list))): i = ll[idx] name = i.split('/')[-1] anchor = res_list[idx] anchor = [int(np.round(num)) for num in anchor] iterm = name+','+str(anchor[0])+','+str(anchor[1])+','+str(anchor[2])+','+str(anchor[3]) res.append(iterm) if parser.show_out_im: im = cv2.imread(i) im = cv2.rectangle(im, (anchor[0], anchor[1]), (anchor[2], anchor[3]), (0,0,255), 3) cv2.imwrite(os.path.join(parser.output_path, os.path.basename(i)), im) # for i in open('./xpd.txt').read().split('\n'): name,x,y,xx,yy = iterm.split(',') x,y,xx,yy = [int(i) for i in [x,y,xx,yy]] w, h = w_h_list[idx] # print(x,y,xx,yy,w,h, name) t = y d = h - yy l = x r = w - xx t,d,l,r = [str(i) for i in [t,d,l,r]] open(os.path.join(parser.output_path, name.replace('.jpeg', '.txt')), 'w').write(','.join([t,d,l,r])) open(os.path.join(parser.output_path,'xpd.res'), 'w').write('\n'.join(res)) if parser.test: test(parser.output_path)
def get_dataset(self, set_name, sub_dir=None): with redirect_stdout(None): training_dataset = CocoDataset(root_dir=self.root_dir, set_name=set_name, transform=None, sub_dir=sub_dir) [min_w, min_h] = self.get_min_size(training_dataset) if set_name == 'val': _transforms = transforms.Compose([Normalizer(), Resizer()]) else: _transforms = transforms.Compose([ Normalizer(), Augmenter(), RandomCropOrScale(min_w=min_w, min_h=min_h) ]) training_dataset.transform = _transforms return training_dataset
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', default='coco', help='Dataset type, must be one of csv or coco.') parser.add_argument( '--coco_path', default='/home/hao.wyh/jupyter/黑边/smart_reverse_label/coco/', help='Path to COCO directory') #parser.add_argument('--coco_path', default='/home/hao.wyh/jupyter/黑边/评估任务/3k_imgs/coco/', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() coco_eval.evaluate_coco(dataset_val, retinanet)
def get_training_dataloader(self, set_name='train' ): # this can be used for entire sets with redirect_stdout(None): self.training_dataset = CocoDataset(root_dir=self.root_dir, set_name=set_name, transform=None) [min_w, min_h] = self.get_min_size(self.training_dataset) self.training_dataset.transform = transforms.Compose( [Normalizer(), Augmenter(), Resizer()]) # RandomCropOrScale(min_w, min_h)]) # training_dataset = self.get_dataset(set_name=set_name) sampler_train = AspectRatioBasedSampler(self.training_dataset, batch_size=self.batch_size, shuffle=True) self.training_dataloader = DataLoader(dataset=self.training_dataset, num_workers=self.workers, collate_fn=collater, batch_sampler=sampler_train, pin_memory=True) self.print_data_statistics(data_loader=self.training_dataloader, set_type='Training')
def get_validation_dataloader(self, sub_dir=None, sort=False, set_name='val'): with redirect_stdout(None): self.validation_dataset = CocoDataset( root_dir=self.root_dir, set_name=set_name, sub_dir=sub_dir, transform=transforms.Compose([Normalizer(), Resizer()]), categories=self.categories, sort=sort) # validation_dataset = self.get_dataset(set_name=set_name, sub_dir=sub_dir) sampler_val = AspectRatioBasedSampler(self.validation_dataset, batch_size=1, shuffle=False) self.validation_dataloader = DataLoader(self.validation_dataset, num_workers=self.workers, collate_fn=collater, batch_sampler=sampler_val, pin_memory=True) self.print_data_statistics(data_loader=self.validation_dataloader, set_type='Validation')
def main(): args = get_args() if not os.path.exists(args.save_path): os.mkdir(args.save_path) log_path = os.path.join(args.save_path, 'log') if not os.path.exists(log_path): os.mkdir(log_path) writer = SummaryWriter(log_dir=log_path) data_path = args.data_path train_path = os.path.join(data_path, 'train/label.txt') val_path = os.path.join(data_path, 'val/label.txt') # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),RandomFlip()])) dataset_train = TrainDataset(train_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_train = DataLoader(dataset_train, num_workers=8, batch_size=args.batch, collate_fn=collater, shuffle=True) # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()])) dataset_val = ValDataset(val_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_val = DataLoader(dataset_val, num_workers=8, batch_size=args.batch, collate_fn=collater) total_batch = len(dataloader_train) # Create the model # if args.depth == 18: # retinaface = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # retinaface = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # retinaface = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # retinaface = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # retinaface = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} retinaface = torchvision_model.create_retinaface(return_layers) retinaface = retinaface.cuda() retinaface = torch.nn.DataParallel(retinaface).cuda() retinaface.training = True optimizer = optim.Adam(retinaface.parameters(), lr=1e-3) # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1) print('Start to train.') epoch_loss = [] iteration = 0 for epoch in range(args.epochs): retinaface.train() #print('Current learning rate:',scheduler.get_lr()[0]) # retinaface.module.freeze_bn() # retinaface.module.freeze_first_layer() # Training for iter_num, data in enumerate(dataloader_train): optimizer.zero_grad() classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() bbox_regression_loss = bbox_regression_loss.mean() ldm_regression_loss = ldm_regression_loss.mean() # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss loss = classification_loss + bbox_regression_loss + ldm_regression_loss loss.backward() optimizer.step() #epoch_loss.append(loss.item()) if iter_num % args.verbose == 0: log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % ( epoch, args.epochs, iter_num, total_batch) table_data = [['loss name', 'value'], ['total_loss', str(loss.item())], [ 'classification', str(classification_loss.item()) ], ['bbox', str(bbox_regression_loss.item())], ['landmarks', str(ldm_regression_loss.item())]] table = AsciiTable(table_data) #table = SingleTable(table_data) #table = DoubleTable(table_data) log_str += table.table print(log_str) # write the log to tensorboard writer.add_scalars( 'losses:', { 'total_loss': loss.item(), 'cls_loss': classification_loss.item(), 'bbox_loss': bbox_regression_loss.item(), 'ldm_loss': ldm_regression_loss.item() }, iteration * args.verbose) iteration += 1 #scheduler.step() #scheduler.step(np.mean(epoch_loss)) # Eval if epoch % args.eval_step == 0: print('-------- RetinaFace Pytorch --------') print('Evaluating epoch {}'.format(epoch)) recall, precision = eval_widerface.evaluate( dataloader_val, retinaface) print('Recall:', recall) print('Precision:', precision) # Save model if (epoch + 1) % args.save_step == 0: torch.save(retinaface.state_dict(), args.save_path + '/model_epoch_{}.pt'.format(epoch + 1))
def main(args=None): parser = argparse.ArgumentParser(description='Simple testing script for RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.',default = "csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)',default="binary_class.csv") parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--csv_box_annot', help='Path to file containing predicted box annotations ') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--model', help='Path of .pt file with trained model',default = 'esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save',default = 'trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept',default=0.15) parser.add_argument('--nms_threshold', help='Score above which boxes are kept',default=0.2) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement',default=100) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition',default=50) parser.add_argument('--seg_level', help='Line or word, to choose anchor aspect ratio',default='line') parser.add_argument('--htr_gt_box',help='Train recognition branch with box gt (for debugging)',default=False) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'csv': if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) if parser.csv_box_annot is not None: box_annot_data = CSVDataset(train_file=parser.csv_box_annot, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: box_annot_data = None else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if box_annot_data is not None: sampler_val = AspectRatioBasedSampler(box_annot_data, batch_size=1, drop_last=False) dataloader_box_annot = DataLoader(box_annot_data, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) else: dataloader_box_annot = dataloader_val if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model alphabet=dataset_val.alphabet if os.path.exists(parser.model): retinanet = torch.load(parser.model) else: if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True,max_boxes=int(parser.max_boxes),score_threshold=float(parser.score_threshold),seg_level=parser.seg_level,alphabet=alphabet) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() #retinanet = torch.load('../Documents/TRAINED_MODELS/pytorch-retinanet/esposallescsv_retinanet_99.pt') #print "LOADED pretrained MODEL\n\n" optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.module.freeze_bn() best_cer = 1000 epochs_no_improvement=0 cers=[] retinanet.eval() retinanet.module.epochs_only_det = 0 #retinanet.module.htr_gt_box = False retinanet.training=False if parser.score_threshold is not None: retinanet.module.score_threshold = float(parser.score_threshold) '''if parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') ''' mAP = csv_eval.evaluate(dataset_val, retinanet,score_threshold=retinanet.module.score_threshold) aps = [] for k,v in mAP.items(): aps.append(v[0]) print ("VALID mAP:",np.mean(aps)) print("score th",retinanet.module.score_threshold) for idx,data in enumerate(dataloader_box_annot): print("Eval CER on validation set:",idx,"/",len(dataloader_box_annot),"\r") if box_annot_data: image_name = box_annot_data.image_names[idx].split('/')[-1].split('.')[-2] else: image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2] #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val) text_gt_path="/".join(dataset_val.image_names[idx].split('/')[:-1]) text_gt = os.path.join(text_gt_path,image_name+'.txt') f =open(text_gt,'r') text_gt_lines=f.readlines()[0] transcript_pred = get_transcript(image_name,data,retinanet,retinanet.module.score_threshold,float(parser.nms_threshold),dataset_val,alphabet) cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines)) print("GT",text_gt_lines) print("PREDS SAMPLE:",transcript_pred) print("VALID CER:",np.mean(cers),"best CER",best_cer) print("GT",text_gt_lines) print("PREDS SAMPLE:",transcript_pred) print("VALID CER:",np.mean(cers),"best CER",best_cer)
def main(): data_type = 'coco' data_root_dir = '/data/data_coco/' # model_depth = 50 epoch_max = 100 batch_size = 8 if data_type == 'coco': dataset_train = CocoDataset(data_root_dir, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(data_root_dir, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) else: print('暂不支持') sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=True) loader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=batch_size, drop_last=True) loader_val = DataLoader(dataset_val, num_workers=8, collate_fn=collater, batch_sampler=sampler_val) retinanet = model.retinanet_50(dataset_train.num_classes(), pretrained=True) retinanet = retinanet.cuda() optimizer = torch.optim.Adam(retinanet.parameters(), lr=1e-4) # optimizer = torch.optim.SGD(retinanet.parameters(), lr=1e-4, momentum=0.9, weight_decay=5e-4) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True, factor=0.5) model_pretrain_dir = './model/model_final.pt' if os.path.exists(model_pretrain_dir): print('pretrain model exist!') retinanet = torch.load(model_pretrain_dir) print('train images num: {}'.format(len(loader_train) * batch_size)) for epoch_num in range(epoch_max): retinanet.train() epoch_loss = [] for iter_num, data in enumerate(loader_train): optimizer.zero_grad() input_tensor = [data['img'].cuda().float(), data['annot']] classification_loss, regression_loss = retinanet(input_tensor) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss epoch_loss.append(float(loss)) if loss.item() == 0: continue loss.backward() optimizer.step() print( 'Epoch:{}/{} | Iters:{}/{} | C loss:{:.4f} | R loss:{:.4f} | Current loss:{:.4f} | Current LR:{:.7f}' .format(epoch_num + 1, epoch_max, iter_num + 1, len(loader_train), float(classification_loss), float(regression_loss), np.mean(epoch_loss), optimizer.param_groups[0]['lr'])) del classification_loss del regression_loss # 每个epoch 进行验证一次 eval.eval_coco(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet, './model/{}_retinanet_{}.pt'.format(data_type, epoch_num + 1)) retinanet.eval() torch.save(retinanet, './model/model_final.pt')
model1 = torch.load(model_wt_path1) model1 = model1.to(device) model1.eval() my_models.append(model1) model_wt_path2 = './Baseline_Ensemble/csv_retinanet_17.pt' model2 = torch.load(model_wt_path2) model2 = model2.to(device) model2.eval() my_models.append(model2) # In[ ]: test_file_path = args.test_anno_file csv_classes_path = 'classname2id.csv' epoch_num = 0 # epoch_num = 15 dataset_test = CSVDataset(train_file=test_file_path, class_list=csv_classes_path, transform=transforms.Compose( [Normalizer(), Resizer()])) mAP = csv_eval.evaluate(dataset_test, my_models, epoch_num) print(mAP) print('mAP over all classes', np.mean(list(mAP.values()))) # In[ ]: # get_ipython().system(u'pwd') # In[ ]:
def detect(checkpoint, pred_on_path, output_path, threshold=0.5, visualize=False, red_label='sick'): device = torch.device(type='cuda') if torch.cuda.is_available() else torch.device(type='cpu') if os.path.exists(output_path): shutil.rmtree(output_path) os.makedirs(output_path) logger.info('inside ' + str(pred_on_path) + ': ' + str(os.listdir(pred_on_path))) dataset_val = PredDataset(pred_on_path=pred_on_path, transform=transforms.Compose([Normalizer(), Resizer(min_side=608)])) #TODO make resize an input param logger.info('dataset prepared') dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=None) logger.info('data loader initialized') labels = checkpoint['labels'] logger.info('labels are: ' + str(labels)) num_classes = len(labels) configs = deepcopy(checkpoint['training_configs']) configs.update(checkpoint['hp_values']) logger.info('initializing object_detection model') model = retinanet(depth=checkpoint['depth'], num_classes=num_classes, scales=configs['anchor_scales'], ratios=configs['anchor_ratios']) #TODO: make depth an input parameter logger.info('loading weights') model.load_state_dict(checkpoint['model']) model = model.to(device=device) logger.info('model to device: ' + str(device)) model.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) inference_times = [] for idx, data in enumerate(dataloader_val): scale = data['scale'][0] with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = model(data['img'].to(device=device).float()) elapsed_time = time.time() - st print('Elapsed time: {}'.format(elapsed_time)) inference_times.append(elapsed_time) idxs = np.where(scores.cpu() > threshold)[0] if visualize: img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) detections_list = [] for j in range(idxs.shape[0]): bbox = transformed_anchors[idxs[j], :] if visualize: x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_idx = int(classification[idxs[j]]) label_name = labels[label_idx] score = scores[idxs[j]].item() if visualize: draw_caption(img, (x1, y1, x2, y2), label_name) if red_label in label_name: cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) else: cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 255, 0), thickness=2) print(label_name) # un resize for eval against gt bbox /= scale bbox.round() x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) detections_list.append([label_name, str(score), str(x1), str(y1), str(x2), str(y2)]) img_name = dataset_val.image_names[idx].split('/')[-1] i_name = img_name.split('.')[0] filename = i_name + '.txt' filepathname = os.path.join(output_path, filename) with open(filepathname, 'w', encoding='utf8') as f: for single_det_list in detections_list: for i, x in enumerate(single_det_list): f.write(str(x)) f.write(' ') f.write('\n') if visualize: save_to_path = os.path.join(output_path, img_name) cv2.imwrite(save_to_path, img) cv2.waitKey(0) print('average inference time per image: ', np.mean(inference_times)) return output_path
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, '{}_retinanet_dilation_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final_dilation.pt'.format(epoch_num))
def main(args=None): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=sampler_val) retinanet = torch.load(parser.model) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() unnormalize = UnNormalizer() def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = retinanet(data['img'].cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) idxs = np.where(scores > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int(classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) print(label_name) cv2.imshow('img', img) cv2.waitKey(0)
def train(args): train_csv = args.train_csv test_csv = args.test_csv labels_csv = args.labels_csv model_type = args.model_type epochs = int(args.epochs) batch_size = int(args.batch_size) dataset_train = CSVDataset(train_file=train_csv, class_list=labels_csv, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CSVDataset(train_file=test_csv, class_list=labels_csv, transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=batch_size, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) retinanet = RetinaNet_efficientnet_b4( num_classes=dataset_train.num_classes(), model_type=model_type) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue mAP, MAP = evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, '{}_retinanet_{}_map{}.pt'.format("EfficientNet" + model_type, epoch_num, MAP)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))
import numpy as np import torch import torch.nn as nn import torch.optim as optim from torch.optim import lr_scheduler from torch.autograd import Variable from torchvision import datasets, models, transforms import torchvision import model from anchors import Anchors import losses from dataloader import CocoDataset, CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, UnNormalizer, Normalizer from torch.utils.data import Dataset, DataLoader import coco_eval import csv_eval import warnings warnings.filterwarnings("ignore") assert torch.__version__.split('.')[1] == '4' os.environ["CUDA_VISIBLE_DEVICES"] = "1" print('CUDA available: {}'.format(torch.cuda.is_available())) dataset_val = CSVDataset(train_file="val.csv", class_list="classes.csv", transform=transforms.Compose([Normalizer(), Resizer()])) retinanet = torch.load("./logs/csv_retinanet_139.pt").cuda() retinanet.eval() map = csv_eval.evaluate(dataset_val,retinanet) print(map)
return args if __name__ == "__main__": args = parse_args() json.dump(vars(args), open(args.config_path, 'w')) # Create the data loaders if args.dataset == 'coco': dataset_train = CocoDataset( root_dir=args.coco_path, set_name='train2017', transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer() ]) ) dataset_val = CocoDataset( root_dir=args.coco_path, set_name='val2017', transform=transforms.Compose([ Normalizer(), Resizer() ]) ) elif args.dataset == 'csv': dataset_train = CSVDataset( train_file=args.csv_train,
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--optimizer', help='[SGD | Adam]', type=str, default='SGD') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) # Create the data loaders print("\n[Phase 1]: Creating DataLoader for {} dataset".format( parser.dataset)) if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2014', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2014', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=16, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=8, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') print('| Num training images: {}'.format(len(dataset_train))) print('| Num test images : {}'.format(len(dataset_val))) print("\n[Phase 2]: Preparing RetinaNet Detection Model...") use_gpu = torch.cuda.is_available() if use_gpu: device = torch.device('cuda') retinanet = retinanet.to(device) retinanet = torch.nn.DataParallel(retinanet, device_ids=range( torch.cuda.device_count())) print("| Using %d GPUs for Train/Validation!" % torch.cuda.device_count()) retinanet.training = True if parser.optimizer == 'Adam': optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) # not mentioned print("| Adam Optimizer with Learning Rate = {}".format(1e-5)) elif parser.optimizer == 'SGD': optimizer = optim.SGD(retinanet.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4) print("| SGD Optimizer with Learning Rate = {}".format(1e-2)) else: raise ValueError('Unsupported Optimizer, must be one of [SGD | Adam]') scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn( ) # Freeze the BN parameters to ImageNet configuration # Check if there is a 'checkpoints' path if not osp.exists('./checkpoints/'): os.makedirs('./checkpoints/') print("\n[Phase 3]: Training Model on {} dataset...".format( parser.dataset)) for epoch_num in range(parser.epochs): epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].to(device), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.001) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) sys.stdout.write('\r') sys.stdout.write( '| Epoch: {} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num + 1, iter_num + 1, len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist))) sys.stdout.flush() del classification_loss del regression_loss except Exception as e: print(e) continue print("\n| Saving current best model at epoch {}...".format(epoch_num + 1)) torch.save( retinanet.state_dict(), './checkpoints/{}_retinanet_{}.pt'.format(parser.dataset, epoch_num + 1)) if parser.dataset == 'coco': #print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, device) elif parser.dataset == 'csv' and parser.csv_val is not None: #print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet, device) scheduler.step(np.mean(epoch_loss)) retinanet.eval() torch.save(retinanet.state_dict(), './checkpoints/model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description= 'Simple visualizing script for visualize a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument('--ROI_model', help='Path to ROI model (.pt) file.') parser.add_argument('--QRCode_model', help="path to QRcode model(.pt) file") parser = parser.parse_args(args) if parser.dataset == 'coco': dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([ Normalizer(ROI_mean, ROI_std), Resizer() ])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') dataloader_val = DataLoader(dataset_val, num_workers=1, collate_fn=collater, batch_sampler=None, sampler=None) ROI_net = torch.load(parser.ROI_model) QRCode_net = torch.load(parser.QRCode_model) use_gpu = True if use_gpu: ROI_net = ROI_net.cuda() QRCode_net = QRCode_net.cuda(0) ROI_net.eval() QRCode_net.eval() unnormalize = UnNormalizer(ROI_mean, ROI_std) def draw_caption(image, box, caption): b = np.array(box).astype(int) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) cv2.putText(image, caption, (b[0], b[1] - 10), cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) for idx, data in enumerate(dataloader_val): with torch.no_grad(): st = time.time() scores, classification, transformed_anchors = ROI_net( data['img'].cuda().float()) print('Elapsed time: {}'.format(time.time() - st)) # if batch_size = 1, and batch_sampler, sampler is None, then no_shuffle, will use sequential index, then the get_image_name is OK. # otherwise, it will failed. fn = dataset_val.get_image_name(idx) print('fn of image:', fn) idxs = np.where(scores.cpu() > 0.5) img = np.array(255 * unnormalize(data['img'][0, :, :, :])).copy() img[img < 0] = 0 img[img > 255] = 255 img = np.transpose(img, (1, 2, 0)) img = cv2.cvtColor(img.astype(np.uint8), cv2.COLOR_BGR2RGB) print("image shape when drawcaption:", img.shape) for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) label_name = dataset_val.labels[int( classification[idxs[0][j]])] draw_caption(img, (x1, y1, x2, y2), label_name) cv2.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2) if idxs[0].shape[0] == 1: origin_img = cv2.imread(fn) ph, pw, _ = img.shape ret = convert_predict_to_origin_bbox(origin_img, pw, ph, x1, y1, x2, y2) if ret is None: print("ERROR: convert predicted origin bbox error") continue x1p, y1p, x2p, y2p = ret print("ROI predicted:", x1p, y1p, x2p, y2p) output_file.write(fn + ',' + str(x1p) + ',' + str(y1p) + ',' + str(x2p) + ',' + str(y2p) + ',ROI\n') print("!!!! FN {} saved!!!".format(fn)) ROI = origin_img[y1p:y2p, x1p:x2p] cv2.rectangle(origin_img, (x1p, y1p), (x2p, y2p), color=(0, 0, 255), thickness=8) #import pdb #pdb.set_trace() ROI = ROI.astype(np.float32) / 255.0 # normalize it ROI_normalized = (ROI - QRCode_mean) / QRCode_std #resize it rows, cols, cns = ROI_normalized.shape smallest_side = min(rows, cols) #rescale the image so the smallest side is min_side min_side = 600.0 max_side = 900.0 scale = min_side / smallest_side #check if the largest side is now greater than max_side, which can happen # when images have a large aspect ratio largest_side = max(rows, cols) if largest_side * scale > 900: scale = max_side / largest_side # resize the image with the computed scale ROI_scale = skimage.transform.resize( ROI_normalized, (int(round(rows * scale)), int(round((cols * scale))))) rows, cols, cns = ROI_scale.shape pad_w = 32 - rows % 32 pad_h = 32 - cols % 32 ROI_padded = np.zeros( (rows + pad_w, cols + pad_h, cns)).astype(np.float32) ROI_padded[:rows, :cols, :] = ROI_scale.astype(np.float32) x = torch.from_numpy(ROI_padded) print('x.shape:', x.shape) x = torch.unsqueeze(x, dim=0) print('x.shape after unsqueeze:', x.shape) x = x.permute(0, 3, 1, 2) print('x.shape after permute:', x.shape) scores, classification, transformed_anchors = QRCode_net( x.cuda().float()) print('scores:', scores) print('classification;', classification) print('transformed_anchors:', transformed_anchors) idxs = np.where(scores.cpu() > 0.5) predict_height, predict_width, _ = ROI_padded.shape for j in range(idxs[0].shape[0]): bbox = transformed_anchors[idxs[0][j], :] x1 = int(bbox[0]) y1 = int(bbox[1]) x2 = int(bbox[2]) y2 = int(bbox[3]) print("!!QRCode predicted bbox inside ROI:", x1, y1, x2, y2) ret = convert_predict_to_origin_bbox( ROI, predict_width, predict_height, x1, y1, x2, y2) if ret is None: continue qrcode_x1, qrcode_y1, qrcode_x2, qrcode_y2 = ret print('qrcode(bbox):', qrcode_x1, qrcode_y1, qrcode_x2, qrcode_y2) qrcode_img_x1 = x1p + qrcode_x1 qrcode_img_y1 = y1p + qrcode_y1 qrcode_img_x2 = x1p + qrcode_x2 qrcode_img_y2 = y1p + qrcode_y2 print('!!!QRCode in image:', qrcode_img_x1, qrcode_img_y1, qrcode_img_x2, qrcode_img_y2) cv2.rectangle(origin_img, (qrcode_img_x1, qrcode_img_y1), (qrcode_img_x2, qrcode_img_y2), color=(255, 0, 0), thickness=8) cv2.imwrite('origin_img_qrcode.png', origin_img) resized = cv2.resize(origin_img, (800, 600)) cv2.imshow('result', resized) else: not_processed_file.write(fn + ",,,,,\n") if debug: cv2.imshow('img', img) cv2.setWindowTitle('img', fn) key = cv2.waitKey(0) if 'q' == chr(key & 255): exit(0) output_file.close() not_processed_file.close()
def main(args=None): """ In current implementation, if test csv is provided, we use that as validation set and combine the val and train csv's as the csv for training. If train_all_labeled_data flag is use, then we combine all 3 (if test is provided) for training and use a prespecified learning rate step schedule. """ parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)', default=None) parser.add_argument( '--csv_test', help= 'Path to file containing test annotations (optional, if provided, train & val will be combined for training and test will be used for evaluation)', default=None) parser.add_argument('--lr', type=float, default=2e-5) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=101) parser.add_argument('--epochs', help='Number of epochs', type=int, default=25) parser.add_argument('--model_output_dir', type=str, default='models') parser.add_argument( '--train_all_labeled_data', help= 'Combine train, val, and test into 1 training set. Will use prespecified learning rate scheduler steps', action='store_true') parser.add_argument('--resnet-backbone-normalization', choices=['batch_norm', 'group_norm'], type=str, default='batch_norm') parser = parser.parse_args(args) print('Learning Rate: {}'.format(parser.lr)) print("Normalization: ", parser.resnet_backbone_normalization) # Create folder - will raise error if folder exists assert (os.path.exists(parser.model_output_dir) == False) os.mkdir(parser.model_output_dir) if parser.csv_train is None: raise ValueError('Must provide --csv_train when training,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training,') if not parser.csv_val and parser.csv_test: raise ValueError( "Cannot specify test set without specifying validation set") if parser.train_all_labeled_data: csv_paths = [parser.csv_train, parser.csv_val, parser.csv_test] train_csv = [] for path in csv_paths: if isinstance(path, str): train_csv.append(path) val_csv = None else: if parser.csv_train and parser.csv_val and parser.csv_test: train_csv = [parser.csv_train, parser.csv_val ] # Combine train and val sets for training val_csv = parser.csv_test else: train_csv = parser.csv_train val_csv = parser.csv_val print('loading train data') print(train_csv) dataset_train = CSVDataset(train_file=train_csv, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) print(dataset_train.__len__()) if val_csv is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=val_csv, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) print('putting data into loader') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model print('creating model') if parser.depth == 18: retinanet = model.resnet18( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 34: retinanet = model.resnet34( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 50: retinanet = model.resnet50( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True, normalization=parser.resnet_backbone_normalization) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=parser.lr) lr_factor = 0.3 if not parser.train_all_labeled_data: scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, factor=lr_factor, verbose=True) else: # these milestones are for when using the lung masks - not for unmasked lung data scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=[12, 16, 20, 24], gamma=lr_factor) # masked training #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[14, 18, 22, 26], gamma=lr_factor) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() #initialize tensorboard writer = SummaryWriter(comment=parser.model_output_dir) # Augmentation seq = iaa.Sequential([ iaa.Fliplr(0.5), iaa.Flipud(0.5), iaa.Affine(scale={ "x": (1.0, 1.2), "y": (1.0, 1.2) }, rotate=(-20, 20), shear=(-4, 4)) ], random_order=True) def augment(data, seq): for n, img in enumerate(data['img']): # imgaug needs dim in format (H, W, C) image = data['img'][n].permute(1, 2, 0).numpy() bbs_array = [] for ann in data['annot'][n]: x1, y1, x2, y2, _ = ann bbs_array.append(BoundingBox(x1=x1, y1=y1, x2=x2, y2=y2)) bbs = BoundingBoxesOnImage(bbs_array, shape=image.shape) image_aug, bbs_aug = seq(image=image, bounding_boxes=bbs) # save augmented image and chage dims to (C, H, W) data['img'][n] = torch.tensor(image_aug.copy()).permute(2, 0, 1) # save augmented annotations for i, bbox in enumerate(bbs_aug.bounding_boxes): x1, y1, x2, y2 = bbox.x1, bbox.y1, bbox.x2, bbox.y2 obj_class = data['annot'][n][i][-1] data['annot'][n][i] = torch.tensor([x1, y1, x2, y2, obj_class]) return data print('Num training images: {}'.format(len(dataset_train))) dir_training_images = os.path.join(os.getcwd(), writer.log_dir, 'training_images') os.mkdir(dir_training_images) best_validation_loss = None best_validation_map = None for epoch_num in range(parser.epochs): writer.add_scalar('Train/LR', optimizer.param_groups[0]['lr'], epoch_num) retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() data = augment(data, seq) # save a few training images to see what augmentation looks like if iter_num % 100 == 0 and epoch_num == 0: x1, y1, x2, y2, _ = data['annot'][0][0] fig, ax = plt.subplots(1) ax.imshow(data['img'][0][1]) rect = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=1, edgecolor='r', facecolor='none', alpha=1) ax.add_patch(rect) fig.savefig( os.path.join(dir_training_images, '{}.png'.format(iter_num))) plt.close() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() if parser.resnet_backbone_normalization == 'batch_norm': torch.nn.utils.clip_grad_norm_( parameters=retinanet.parameters(), max_norm=0.1) else: torch.nn.utils.clip_grad_norm_( parameters=retinanet.parameters(), max_norm=0.01 ) # Decrease norm to reduce risk of exploding gradients optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue writer.add_scalar('Train/Loss', np.mean(epoch_loss), epoch_num) if not parser.train_all_labeled_data: print('Evaluating Validation Loss...') with torch.no_grad(): retinanet.train() val_losses, val_class_losses, val_reg_losses = [], [], [] for val_iter_num, val_data in enumerate(dataloader_val): try: val_classification_loss, val_regression_loss = retinanet( [ val_data['img'].cuda().float(), val_data['annot'] ]) val_losses.append( float(val_classification_loss) + float(val_regression_loss)) val_class_losses.append(float(val_classification_loss)) val_reg_losses.append(float(val_regression_loss)) del val_classification_loss, val_regression_loss except Exception as e: print(e) continue print( 'VALIDATION Epoch: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Total loss: {:1.5f}' .format(epoch_num, np.mean(val_class_losses), np.mean(val_reg_losses), np.mean(val_losses))) # Save model with best validation loss if best_validation_loss is None: best_validation_loss = np.mean(val_losses) if best_validation_loss >= np.mean(val_losses): best_validation_loss = np.mean(val_losses) torch.save( retinanet.module, parser.model_output_dir + '/best_result_valloss.pt') writer.add_scalar('Validation/Loss', np.mean(val_losses), epoch_num) # Calculate Validation mAP print('Evaluating validation mAP') mAP = csv_eval.evaluate(dataset_val, retinanet) print("Validation mAP: " + str(mAP[0][0])) if best_validation_map is None: best_validation_map = mAP[0][0] elif best_validation_map < mAP[0][0]: best_validation_map = mAP[0][0] torch.save( retinanet.module, parser.model_output_dir + '/best_result_valmAP.pt') writer.add_scalar('Validation/mAP', mAP[0][0], epoch_num) if not parser.train_all_labeled_data: scheduler.step(np.mean(val_losses)) else: scheduler.step() torch.save( retinanet.module, parser.model_output_dir + '/retinanet_{}.pt'.format(epoch_num)) retinanet.eval() torch.save(retinanet, parser.model_output_dir + '/model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default="csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)', default="binary_class.csv") parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--epochs_only_det', help='Number of epochs to train detection part', type=int, default=1) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement', type=int, default=100) parser.add_argument('--pretrained_model', help='Path of .pt file with pretrained model', default='esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save', default='trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept', type=float, default=0.5) parser.add_argument('--nms_threshold', help='Score above which boxes are kept', type=float, default=0.2) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition', default=95) parser.add_argument('--seg_level', help='[line, word], to choose anchor aspect ratio', default='word') parser.add_argument( '--early_stop_crit', help='Early stop criterion, detection (map) or transcription (cer)', default='cer') parser.add_argument('--max_iters_epoch', help='Max steps per epoch (for debugging)', default=1000000) parser.add_argument('--train_htr', help='Train recognition or not', default='True') parser.add_argument('--train_det', help='Train detection or not', default='True') parser.add_argument( '--binary_classifier', help= 'Wether to use classification branch as binary or not, multiclass instead.', default='False') parser.add_argument( '--htr_gt_box', help='Train recognition branch with box gt (for debugging)', default='False') parser.add_argument( '--ner_branch', help='Train named entity recognition with separate branch', default='False') parser = parser.parse_args(args) if parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train') dataset_name = parser.csv_train.split("/")[-2] dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # Files for training log experiment_id = str(time.time()).split('.')[0] valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'w') for arg in vars(parser): if getattr(parser, arg) is not None: valid_cer_f.write( str(arg) + ' ' + str(getattr(parser, arg)) + '\n') current_commit = subprocess.check_output(['git', 'rev-parse', 'HEAD']) valid_cer_f.write(str(current_commit)) valid_cer_f.write( "epoch_num cer best cer mAP best mAP time\n") valid_cer_f.close() sampler = AspectRatioBasedSampler(dataset_train, batch_size=1, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model train_htr = parser.train_htr == 'True' htr_gt_box = parser.htr_gt_box == 'True' ner_branch = parser.ner_branch == 'True' binary_classifier = parser.binary_classifier == 'True' torch.backends.cudnn.benchmark = False alphabet = dataset_train.alphabet if os.path.exists(parser.pretrained_model): retinanet = torch.load(parser.pretrained_model) retinanet.classificationModel = ClassificationModel( num_features_in=256, num_anchors=retinanet.anchors.num_anchors, num_classes=dataset_train.num_classes()) if ner_branch: retinanet.nerModel = NERModel( feature_size=256, pool_h=retinanet.pool_h, n_classes=dataset_train.num_classes(), pool_w=retinanet.pool_w) else: if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, max_boxes=int(parser.max_boxes), score_threshold=float( parser.score_threshold), seg_level=parser.seg_level, alphabet=alphabet, train_htr=train_htr, htr_gt_box=htr_gt_box, ner_branch=ner_branch, binary_classifier=binary_classifier) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, max_boxes=int(parser.max_boxes), score_threshold=float( parser.score_threshold), seg_level=parser.seg_level, alphabet=alphabet, train_htr=train_htr, htr_gt_box=htr_gt_box) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True train_htr = parser.train_htr == 'True' train_det = parser.train_det == 'True' retinanet.htr_gt_box = parser.htr_gt_box == 'True' retinanet.train_htr = train_htr retinanet.epochs_only_det = parser.epochs_only_det if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=50, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.train() retinanet.module.freeze_bn() best_cer = 1000 best_map = 0 epochs_no_improvement = 0 verbose_each = 20 optimize_each = 1 objective = 100 best_objective = 10000 print(('Num training images: {}'.format(len(dataset_train)))) for epoch_num in range(parser.epochs): cers = [] retinanet.training = True retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): if iter_num > int(parser.max_iters_epoch): break try: if iter_num % optimize_each == 0: optimizer.zero_grad() (classification_loss, regression_loss, ctc_loss, ner_loss) = retinanet([ data['img'].cuda().float(), data['annot'], ctc, epoch_num ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() if train_det: if train_htr: loss = ctc_loss + classification_loss + regression_loss + ner_loss else: loss = classification_loss + regression_loss + ner_loss elif train_htr: loss = ctc_loss else: continue if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) if iter_num % verbose_each == 0: print(( 'Epoch: {} | Step: {} |Classification loss: {:1.5f} | Regression loss: {:1.5f} | CTC loss: {:1.5f} | NER loss: {:1.5f} | Running loss: {:1.5f} | Total loss: {:1.5f}\r' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), float(ctc_loss), float(ner_loss), np.mean(loss_hist), float(loss), "\r"))) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) torch.cuda.empty_cache() except Exception as e: print(e) continue if parser.dataset == 'csv' and parser.csv_val is not None and train_det: print('Evaluating dataset') mAP, text_mAP, current_cer = csv_eval.evaluate( dataset_val, retinanet, score_threshold=parser.score_threshold) #text_mAP,_ = csv_eval_binary_map.evaluate(dataset_val, retinanet,score_threshold=parser.score_threshold) objective = current_cer * (1 - mAP) retinanet.eval() retinanet.training = False retinanet.score_threshold = float(parser.score_threshold) '''for idx,data in enumerate(dataloader_val): if idx>int(parser.max_iters_epoch): break print("Eval CER on validation set:",idx,"/",len(dataset_val),"\r") image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2] #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val) text_gt =".".join(dataset_val.image_names[idx].split('.')[:-1])+'.txt' f =open(text_gt,'r') text_gt_lines=f.readlines()[0] transcript_pred = get_transcript(image_name,data,retinanet,float(parser.score_threshold),float(parser.nms_threshold),dataset_val,alphabet) cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))''' t = str(time.time()).split('.')[0] valid_cer_f.close() #print("GT",text_gt_lines) #print("PREDS SAMPLE:",transcript_pred) if parser.early_stop_crit == 'cer': if float(objective) < float( best_objective): #float(current_cer)<float(best_cer): best_cer = current_cer best_objective = objective epochs_no_improvement = 0 torch.save( retinanet.module, 'trained_models/' + parser.model_out + '{}_retinanet.pt'.format(parser.dataset)) else: epochs_no_improvement += 1 if mAP > best_map: best_map = mAP elif parser.early_stop_crit == 'map': if mAP > best_map: best_map = mAP epochs_no_improvement = 0 torch.save( retinanet.module, 'trained_models/' + parser.model_out + '{}_retinanet.pt'.format(parser.dataset)) else: epochs_no_improvement += 1 if float(current_cer) < float(best_cer): best_cer = current_cer if train_det: print(epoch_num, "mAP: ", mAP, " best mAP", best_map) if train_htr: print("VALID CER:", current_cer, "best CER", best_cer) print("Epochs no improvement:", epochs_no_improvement) valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'a') valid_cer_f.write( str(epoch_num) + " " + str(current_cer) + " " + str(best_cer) + ' ' + str(mAP) + ' ' + str(best_map) + ' ' + str(text_mAP) + '\n') if epochs_no_improvement > 3: for param_group in optimizer.param_groups: if param_group['lr'] > 10e-5: param_group['lr'] *= 0.1 if epochs_no_improvement >= parser.max_epochs_no_improvement: print("TRAINING FINISHED AT EPOCH", epoch_num, ".") sys.exit() scheduler.step(np.mean(epoch_loss)) torch.cuda.empty_cache() retinanet.eval()
def main(args=None): #def main(epoch): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) #parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint') parser.add_argument('--start-epoch', default=0, type=int, help='manual epoch number (useful on restarts)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser = parser.parse_args(args) #args = parser.parse_args() #parser = parser.parse_args(epoch) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() #retinanet().load_state_dict(torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/resnet50-19c8e357.pth')) #if True: #print('==> Resuming from checkpoint..') #checkpoint = torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/coco_retinanet_2.pt') #retinanet().load_state_dict(checkpoint) #best_loss = checkpoint['loss'] #start_epoch = checkpoint['epoch'] retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True #optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) optimizer = optim.SGD(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() #retinanet.freeze_bn() #for train from a middle state retinanet.module.freeze_bn() #for train from the very beginning print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.start_epoch, parser.epochs): if parser.resume: if os.path.isfile(parser.resume): print("=>loading checkpoint '{}'".format(parser.resume)) checkpoint = torch.load(parser.resume) print(parser.start_epoch) #parser.start_epoch = checkpoint['epoch'] #retinanet.load_state_dict(checkpoint['state_dict']) retinanet=checkpoint #retinanet.load_state_dict(checkpoint) print(retinanet) #optimizer.load_state_dict(checkpoint) print("=> loaded checkpoint '{}' (epoch {})".format(parser.resume, checkpoint)) else: print("=> no checkpoint found at '{}'".format(parser.resume)) retinanet.train() retinanet.freeze_bn() #retinanet.module.freeze_bn() if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda()]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) #torch.save(retinanet.module, '{}_retinanet_101_{}.pt'.format(parser.dataset, epoch_num)) torch.save(retinanet, '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num)) name = '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num) parser.resume = '/users/wenchi/ghwwc/pytorch-retinanet-master_new/name' retinanet.eval() torch.save(retinanet, 'model_final_dilation_experiment1.pt'.format(epoch_num))
def main(): precision_global = 0 args = get_args() if not os.path.exists(args.save_path): os.mkdir(args.save_path) log_path = os.path.join(args.save_path, 'log') if not os.path.exists(log_path): os.mkdir(log_path) writer = SummaryWriter(log_dir=log_path) data_path = args.data_path train_path = os.path.join( data_path, 'retina-train-splitTrain.txt') #"train\\label.txt")#'train.txt') val_path = os.path.join( data_path, "retina-train-splitTest.txt" ) #"retina-train-splitTest.txt") #'retina-val.txt')##'val.txt') # train_path = os.path.join(data_path,'train\\label.txt')#"train\\label.txt")#'train.txt') # val_path = os.path.join(data_path,'val\\label.txt')#"val\\label.txt")#'val.txt') # dataset_train = TrainDataset(train_path,transform=transforms.Compose([RandomCroper(),RandomFlip()])) dataset_train = TrainDataset(train_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_train = DataLoader(dataset_train, num_workers=6, batch_size=args.batch, collate_fn=collater, shuffle=True) # dataset_val = ValDataset(val_path,transform=transforms.Compose([RandomCroper()])) dataset_val = ValDataset(val_path, transform=transforms.Compose( [Resizer(), PadToSquare()])) dataloader_val = DataLoader(dataset_val, num_workers=8, batch_size=args.batch, collate_fn=collater) total_batch = len(dataloader_train) # Create the model # if args.depth == 18: # retinaface = model.resnet18(num_classes=2, pretrained=True) # elif args.depth == 34: # retinaface = model.resnet34(num_classes=2, pretrained=True) # elif args.depth == 50: # retinaface = model.resnet50(num_classes=2, pretrained=True) # elif args.depth == 101: # retinaface = model.resnet101(num_classes=2, pretrained=True) # elif args.depth == 152: # retinaface = model.resnet152(num_classes=2, pretrained=True) # else: # raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') # Create torchvision model return_layers = {'layer2': 1, 'layer3': 2, 'layer4': 3} retinaface = torchvision_model.create_retinaface(return_layers) # Load trained model if (args.model_path is not None): retina_dict = retinaface.state_dict() pre_state_dict = torch.load(args.model_path) pretrained_dict = { k[7:]: v for k, v in pre_state_dict.items() if k[7:] in retina_dict } retinaface.load_state_dict(pretrained_dict) retinaface = retinaface.cuda() retinaface = torch.nn.DataParallel(retinaface).cuda() retinaface.training = True optimizer = optim.Adam(retinaface.parameters(), lr=1e-3) # optimizer = optim.SGD(retinaface.parameters(), lr=1e-2, momentum=0.9, weight_decay=0.0005) # scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1) #scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10,30,60], gamma=0.1) #performance detect # print('-------- RetinaFace Pytorch --------') # recall, precision = eval_widerface.evaluate(dataloader_val, retinaface) # print('Recall:', recall) # print('Precision:', precision, "best Precision:", precision_global) print('Start to train.') epoch_loss = [] iteration = 0 for epoch in range(args.epochs): retinaface.train() # Training for iter_num, data in enumerate(dataloader_train): #ff = data["img"].numpy() #print(ff[0][1][320][320]) optimizer.zero_grad() classification_loss, bbox_regression_loss, ldm_regression_loss = retinaface( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() bbox_regression_loss = bbox_regression_loss.mean() ldm_regression_loss = ldm_regression_loss.mean() # loss = classification_loss + 1.0 * bbox_regression_loss + 0.5 * ldm_regression_loss loss = classification_loss + bbox_regression_loss + ldm_regression_loss loss.backward() optimizer.step() if iter_num % args.verbose == 0: log_str = "\n---- [Epoch %d/%d, Batch %d/%d] ----\n" % ( epoch, args.epochs, iter_num, total_batch) table_data = [['loss name', 'value'], ['total_loss', str(loss.item())], [ 'classification', str(classification_loss.item()) ], ['bbox', str(bbox_regression_loss.item())], ['landmarks', str(ldm_regression_loss.item())]] table = AsciiTable(table_data) log_str += table.table print(log_str) # write the log to tensorboard writer.add_scalar('losses:', loss.item(), iteration * args.verbose) writer.add_scalar('class losses:', classification_loss.item(), iteration * args.verbose) writer.add_scalar('box losses:', bbox_regression_loss.item(), iteration * args.verbose) writer.add_scalar('landmark losses:', ldm_regression_loss.item(), iteration * args.verbose) iteration += 1 # Eval if epoch % args.eval_step == 0: print('-------- RetinaFace Pytorch --------') print('Evaluating epoch {}'.format(epoch)) recall, precision = eval_widerface.evaluate( dataloader_val, retinaface) if (precision_global < precision): precision_global = precision torch.save( retinaface.state_dict(), args.save_path + '/model_Best_epoch_{}.pt'.format(epoch + 1)) print('Recall:', recall) print('Precision:', precision, "best Precision:", precision_global) writer.add_scalar('Recall:', recall, epoch * args.eval_step) writer.add_scalar('Precision:', precision, epoch * args.eval_step) # Save model if (epoch + 1) % args.save_step == 0: torch.save(retinaface.state_dict(), args.save_path + '/model_epoch_{}.pt'.format(epoch + 1)) writer.close()
def main(args=None): parser = argparse.ArgumentParser( description='Simple testing script for RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default="csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)', default="binary_class.csv") parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--csv_box_annot', help='Path to file containing predicted box annotations ') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--model', help='Path of .pt file with trained model', default='esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save', default='trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept', default=0.15) parser.add_argument('--nms_threshold', help='Score above which boxes are kept', default=0.2) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement', default=100) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition', default=50) parser.add_argument('--seg_level', help='Line or word, to choose anchor aspect ratio', default='line') parser.add_argument( '--htr_gt_box', help='Train recognition branch with box gt (for debugging)', default=False) parser.add_argument( '--binary_classifier', help= 'Wether to use classification branch as binary or not, multiclass instead.', default='False') parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'csv': if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) if parser.csv_box_annot is not None: box_annot_data = CSVDataset(train_file=parser.csv_box_annot, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: box_annot_data = None else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if box_annot_data is not None: sampler_val = AspectRatioBasedSampler(box_annot_data, batch_size=1, drop_last=False) dataloader_box_annot = DataLoader(box_annot_data, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) else: dataloader_box_annot = dataloader_val if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model alphabet = dataset_val.alphabet if os.path.exists(parser.model): retinanet = torch.load(parser.model) else: print("Choose an existing saved model path.") sys.exit() use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() #retinanet = torch.load('../Documents/TRAINED_MODELS/pytorch-retinanet/esposallescsv_retinanet_99.pt') #print "LOADED pretrained MODEL\n\n" optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.module.freeze_bn() best_cer = 1000 epochs_no_improvement = 0 cers = [] retinanet.eval() retinanet.module.epochs_only_det = 0 #retinanet.module.htr_gt_box = False retinanet.training = False if parser.score_threshold is not None: retinanet.module.score_threshold = float(parser.score_threshold) '''if parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') ''' mAP, binary_mAP, cer = csv_eval.evaluate( dataset_val, retinanet, score_threshold=retinanet.module.score_threshold)
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory', type=str, default='./data/coco') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--checkpoint', help='The path to the checkpoint.', type=str, default=None) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--batch_size', help='Number of batch', type=int, default=16) parser.add_argument('--gpu_ids', help='Gpu parallel', type=str, default='1, 2') parser = parser.parse_args(args) # Create the data lodaders dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() gpu_ids = parser.gpu_ids.split(',') device = torch.device("cuda:" + gpu_ids[0]) torch.cuda.set_device(device) gpu_ids = list(map(int, gpu_ids)) retinanet = torch.nn.DataParallel(retinanet, device_ids=gpu_ids).to(device) if parser.checkpoint: pretrained = torch.load(parser.checkpoint).state_dict() retinanet.module.load_state_dict(pretrained) # add tensorboard to record train log retinanet.training = True writer = SummaryWriter('./log') # writer.add_graph(retinanet, input_to_model=[images, labels]) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].to(device), data['ann'].to(device)]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) writer.add_scalar('Loss/train', loss, iter_num) writer.add_scalar('Loss/reg_loss', regression_loss, iter_num) writer.add_scalar('Loss/cls_loss', classification_loss, iter_num) epoch_loss.append(float(loss)) if (iter_num + 1) % 1000 == 0: print('Save model') torch.save( retinanet.module, 'COCO_retinanet_epoch{}_iter{}.pt'.format( epoch_num, iter_num)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, writer) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, 'COCO_retinanet_{}.pt'.format(epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))
def main(config): # set seed for reproducibility np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed(0) # create folder for model newpath = './models/' + config.model_date if config.save_model: os.makedirs(newpath) # Create the data loaders if config.csv_train is None: raise ValueError('Must provide --csv_train when training on csv,') if config.csv_classes is None: raise ValueError('Must provide --csv_classes when training on csv,') train_dataset = datasets.ImageFolder(os.path.join(config.data_dir, 'train')) dataset_train = GetDataset(train_file=config.csv_train, class_list=config.csv_classes, transform=transforms.Compose( [Augmenter(), Resizer()]), dataset=train_dataset, seed=0) dataloader_train = DataLoader(dataset_train, batch_size=config.batch_size, shuffle=True, num_workers=1, collate_fn=collater) if config.csv_val is None: dataset_val = None print('No validation annotations provided.') else: valid_dataset = datasets.ImageFolder( os.path.join(config.data_dir, 'valid')) dataset_val = GetDataset(train_file=config.csv_val, class_list=config.csv_classes, transform=transforms.Compose([Resizer()]), dataset=valid_dataset, seed=0) # Create the model if config.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif config.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif config.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif config.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif config.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if config.use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) best_valid_map = 0 counter = 0 batch_size = config.batch_size for epoch_num in range(config.epochs): print('\nEpoch: {}/{}'.format(epoch_num + 1, config.epochs)) retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] train_batch_time = AverageMeter() train_losses = AverageMeter() tic = time.time() with tqdm(total=len(dataset_train)) as pbar: for iter_num, data in enumerate(dataloader_train): # try: optimizer.zero_grad() siamese_loss, classification_loss, regression_loss = retinanet( [ data['img'].cuda().float(), data['annot'], data['pair'].cuda().float() ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss + siamese_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) toc = time.time() train_losses.update(float(loss), batch_size) train_batch_time.update(toc - tic) tic = time.time() pbar.set_description(("{:.1f}s - loss: {:.3f}".format( train_batch_time.val, train_losses.val, ))) pbar.update(batch_size) del classification_loss del regression_loss del siamese_loss # except Exception as e: # print('Training error: ', e) # continue if config.csv_val is not None: print('Evaluating dataset') mAP, correct = eval_new.evaluate(dataset_val, retinanet) # is_best = mAP[0][0] > best_valid_map # best_valid_map = max(mAP[0][0], best_valid_map) is_best = correct > best_valid_map best_valid_map = max(correct, best_valid_map) if is_best: counter = 0 else: counter += 1 if counter > 3: print("[!] No improvement in a while, stopping training.") break scheduler.step(np.mean(epoch_loss)) if is_best and config.save_model: torch.save( retinanet.state_dict(), './models/{}/best_retinanet.pt'.format(config.model_date)) if config.save_model: torch.save( retinanet.state_dict(), './models/{}/{}_retinanet_{}.pt'.format( config.model_date, config.depth, epoch_num)) msg = "train loss: {:.3f} - val map: {:.3f} - val acc: {:.3f}%" print( msg.format(train_losses.avg, mAP[0][0], (100. * correct) / len(dataset_val)))