def model_init(model_name): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if model_name == 'retinanet' : #weight_file_path = '/content/retinanet/resnet34-333f7ec4.pth' #weight_file_path = '/content/retinanet/CP_epoch5.pth' weight_file_path = '/content/retinanet/retinanet50_pretrained.pth' total_keys = len(list(torch.load(weight_file_path).keys())) # Create the model if total_keys >= 102 and total_keys < 182 : retinanet = model.resnet18(num_classes=num_classes, pretrained=False) elif total_keys >= 182 and total_keys < 267: retinanet = model.resnet34(num_classes=num_classes, pretrained=False) elif total_keys >= 267 and total_keys < 522: retinanet = model.resnet50(num_classes=num_classes, pretrained=False) elif total_keys >= 522 and total_keys < 777: retinanet = model.resnet101(num_classes=num_classes, pretrained=False) elif total_keys >= 777: retinanet = model.resnet152(num_classes=num_classes, pretrained=False) else: raise ValueError('Unsupported model backbone, must be one of resnet18, resnet34, resnet50, resnet101, resnet152') retinanet.load_state_dict(torch.load(weight_file_path, map_location=device), strict=False) # Initialisng Model with loaded weights print('model initialized..') return retinanet, device
def main(): weights_path = "./resNet34.pth" model = resnet34(num_classes=5) model.load_state_dict(torch.load(weights_path, map_location=device)) model.to(device) # validate_model(model) # module = model.conv1 # print(list(module.named_parameters())) # # print(list(module.named_buffers())) # # # 裁剪50%的卷积核 # prune.ln_structured(module, name="weight", amount=0.5, n=2, dim=0) # print(list(module.weight)) # print(module.weight.shape) # # print(list(module.named_buffers())) # # prune.remove(module, "weight") # print(module.weight.shape) # 收集所有需要裁剪的卷积核 parameters_to_prune = [] for name, module in model.named_modules(): if isinstance(module, torch.nn.Conv2d): parameters_to_prune.append((module, "weight")) # 对卷积核进行剪枝处理 prune.global_unstructured(parameters_to_prune, pruning_method=prune.L1Unstructured, amount=0.5) # 统计剪枝比例 count_sparsity(model, p=False) # 验证剪枝后的模型 validate_model(model)
def pytorch_model_speed(data_loader, val_num): net = resnet34(num_classes=5) # load weights model_weight_path = "./resNet34.pth" check_path_exist(model_weight_path) net.load_state_dict(torch.load(model_weight_path, map_location=device), strict=False) net.eval() test_data = torch.rand((1, 3, 224, 224)) net(test_data.to(device)) forward_time = 0 acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): for val_data in tqdm(data_loader, desc="Running pytorch model..."): val_images, val_labels = val_data t1 = time.time() outputs = net(val_images.to( device)) # eval model only have last output layer t2 = time.time() forward_time += (t2 - t1) predict_y = torch.max(outputs, dim=1)[1] acc += (predict_y == val_labels.to(device)).sum().item() val_accurate = acc / val_num fps = round(val_num / forward_time, 1) print("pytorch info:\nfps: {}/s accuracy: {}\n".format(fps, val_accurate)) return fps, val_accurate, "Pytorch(not opt)"
def __init__(self, model_weight_path, json_file): # self.data_transform = transforms.Compose([transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) # self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") print(self.device) self.device_cpu = torch.device("cpu") self.data_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) json_filef = open(json_file, "r", encoding="utf-8") self.class_indict = json.load(json_filef) # create model if (str.find(model_weight_path, "res50.pth") > 0): self.model = resnet50(num_classes=len(self.class_indict)) # res50 else: self.model = resnet34(num_classes=len(self.class_indict)) # load model weights self.model.load_state_dict(torch.load(model_weight_path)) self.model.eval() self.model.to(self.device)
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # load image img_path_list = ["./tulip.jpg", "./rose.jpg"] img_list = [] # img_list= Image.open(img_path_list).convert('RGB') for img_path in img_path_list: assert os.path.exists(img_path), "file: '{}' dose not exist.".format( img_path) img = Image.open(img_path) img = data_transform(img) img_list.append(img) # batch img batch_img = torch.stack(img_list, dim=0) #torch.stack()是将原来的几个tensor按照一定方式进行堆叠, # 然后在按照堆叠后的维度进行切分。一共有0,1,2三个维度 # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format( json_path) json_file = open(json_path, "r") class_indict = json.load(json_file) # create model model = resnet34(num_classes=5).to(device) # load model weights weights_path = "./resNet34.pth" assert os.path.exists(weights_path), "file: '{}' dose not exist.".format( weights_path) model.load_state_dict(torch.load(weights_path, map_location=device)) # prediction model.eval() with torch.no_grad(): # predict class output = model(batch_img.to(device)).cpu() predict = torch.softmax(output, dim=1) probs, classes = torch.max(predict, dim=1) for idx, (pro, cla) in enumerate(zip(probs, classes)): print("image: {} class: {} prob: {:.3}".format( img_path_list[idx], class_indict[str(cla.numpy())], pro.numpy()))
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") data_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) # load image img_path = "../tulip.jpg" assert os.path.exists(img_path), "file: '{}' dose not exist.".format( img_path) img = Image.open(img_path) plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict json_path = './class_indices.json' assert os.path.exists(json_path), "file: '{}' dose not exist.".format( json_path) json_file = open(json_path, "r") class_indict = json.load(json_file) # create model model = resnet34(num_classes=5).to(device) # load model weights weights_path = "./resNet34.pth" assert os.path.exists(weights_path), "file: '{}' dose not exist.".format( weights_path) model.load_state_dict(torch.load(weights_path, map_location=device)) # prediction model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img.to(device))).cpu() predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print_res = "class: {} prob: {:.3}".format( class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.title(print_res) for i in range(len(predict)): print("class: {:10} prob: {:.3}".format(class_indict[str(i)], predict[i].numpy())) plt.show()
def load_model(self): self.checkpoint = torch.load(self.model_checkpoint_file_path, map_location=lambda storage, loc: storage) self.model_args = self.checkpoint['args'] self.num_classes = None if self.model_args.model_type == 'food179': self.num_classes = 179 elif self.model_args.model_type == 'nsfw': self.num_classes = 5 else: raise ('Not Implemented!') if self.model_args.model_arc == 'resnet18': self.model = model.resnet18(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'resnet34': self.model = model.resnet34(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'resnet50': self.model = model.resnet50(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'resnet101': self.model = model.resnet101(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'resnet152': self.model = model.resnet152(num_classes=self.num_classes, zero_init_residual=True) elif self.model_args.model_arc == 'mobilenet': self.model = model.MobileNetV2(n_class=self.num_classes, input_size=256) else: raise ('Not Implemented!') self.model = nn.DataParallel(self.model) self.model.load_state_dict(self.checkpoint['model_state_dict']) self.model_epoch = self.checkpoint['epoch'] self.model_test_acc = self.checkpoint['test_acc'] self.model_best_acc = self.checkpoint['best_acc'] self.model_test_acc_top5 = self.checkpoint['test_acc_top5'] self.model_class_to_idx = self.checkpoint['class_to_idx'] self.model_idx_to_class = { v: k for k, v in self.model_class_to_idx.items() } self.model_train_history_dict = self.checkpoint['train_history_dict'] self.mean = self.checkpoint['NORM_MEAN'] self.std = self.checkpoint['NORM_STD'] self.model.eval() return
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # load pretrain weights # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth model_weight_path = "./resnet34-pre.pth" assert os.path.exists(model_weight_path), "file {} does not exist.".format( model_weight_path) # option1 net = resnet34() net.load_state_dict(torch.load(model_weight_path, map_location=device)) # change fc layer structure in_channel = net.fc.in_features net.fc = nn.Linear(in_channel, 5)
def main(args=None): data_set = { x: guipang(cfg=cfg['dataset_guipang'], part=x) for x in ['train', 'val'] } # data_set = { # x: qiafan(cfg=cfg['dataset_qiafan'], part=x) for x in ['train', 'val'] # } data_loader = { x: data.DataLoader(data_set[x], batch_size=cfg['batch_size'], num_workers=4, shuffle=True, pin_memory=False) for x in ['train', 'val'] } # Create the model if cfg['depth'] == 18: retinanet = model.resnet18( num_classes=dataset_train.num_classes(), pretrained=True) elif cfg['depth'] == 34: retinanet = model.resnet34( num_classes=dataset_train.num_classes(), pretrained=True) elif cfg['depth'] == 50: retinanet = model.resnet50( num_classes=dataset_train.num_classes(), pretrained=True) elif cfg['depth'] == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True) elif cfg['depth'] == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
def __init__(self, model_weight_path, json_file): # self.data_transform = transforms.Compose([transforms.Resize(256), # transforms.CenterCrop(224), # transforms.ToTensor(), # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]) self.data_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) json_filef = open(json_file, "r", encoding="utf-8") self.class_indict = json.load(json_filef) # create model self.model = resnet34(num_classes=len(self.class_indict)) # load model weights self.model.load_state_dict(torch.load(model_weight_path)) self.model.eval()
def get_network(args): ''' return the required network ''' if args.net == 'resnet34': from model import resnet34 net = resnet34(args.num_classes) elif args.net == 'resnet50': from model import resnet50 net = resnet50(args.num_classes) else: print(args.net + ' is not supported, please enter resnet34 or resnet50') if args.gpu: net = net.cuda() return net
def main(args=None): from dataloader import JinNanDataset, Augmenter, UnNormalizer, Normalizer,Resizer from torch.utils.data import Dataset, DataLoader from torchvision import datasets, models, transforms import model import torch import argparse parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset',default='jingnan', help='Dataset type, must be one of csv or coco.') parser.add_argument('--threshold',help='treshold') parser.add_argument('--dataset_path', help='Path to file containing training and validation annotations (optional, see readme)') parser.add_argument('--model_path',help=('the model path')) parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser = parser.parse_args(args) dataset_val=JinNanDataset(parser.dataset_path, set_name='val', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_val.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_val.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') retinanet=torch.load(parser.model_path) use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet.eval() print('Evaluating dataset') evaluate_jinnan(dataset_val, retinanet)
def build(self, depth=50, learning_rate=1e-5, ratios=[0.5, 1, 2], scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]): # Create the model if depth == 18: retinanet = model.resnet18(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales, weights_dir=self.weights_dir_path, pretrained=True) elif depth == 34: retinanet = model.resnet34(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales, weights_dir=self.weights_dir_path, pretrained=True) elif depth == 50: retinanet = model.resnet50(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales, weights_dir=self.weights_dir_path, pretrained=True) elif depth == 101: retinanet = model.resnet101(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales, weights_dir=self.weights_dir_path, pretrained=True) elif depth == 152: retinanet = model.resnet152(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales, weights_dir=self.weights_dir_path, pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') self.retinanet = retinanet.to(device=self.device) self.retinanet.training = True self.optimizer = optim.Adam(self.retinanet.parameters(), lr=learning_rate) self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=3, verbose=True) if self.checkpoint is not None: self.retinanet.load_state_dict(self.checkpoint['model']) self.optimizer.load_state_dict(self.checkpoint['optimizer']) self.scheduler.load_state_dict(self.checkpoint['scheduler']) # TODO: test this, is it done right? # TODO is it right to resume_read_trial optimizer and schedular like this??? self.ratios = ratios self.scales = scales self.depth = depth
def set_models(self, dataset_train): # Create the model if self.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif self.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif self.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif self.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True) elif self.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs retinanet = nn.DataParallel(retinanet) self.retinanet = retinanet.to(self.device) self.retinanet.training = True self.optimizer = optim.Adam(self.retinanet.parameters(), lr=self.lr) # This lr_shceduler reduce the learning rate based on the models's validation loss self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=3, verbose=True) self.loss_hist = collections.deque(maxlen=500)
def load_model(model_arch,numOfClasses): if model_arch.endswith('resnet34') or model_arch.endswith('resnet34') : NN = resnet34(pretrained = False) num_features = NN.fc.in_features NN.fc = nn.Linear(num_features,numOfClasses) elif model_arch.endswith('resnet50_pretrained') or model_arch.endswith('resnet50'): NN = models.resnet50(pretrained = True) num_features = NN.fc.in_features NN.fc = nn.Linear(num_features,numOfClasses) elif model_arch.endswith('resnet152_pretrained'): NN = models.resnet152(pretrained = True) num_features = NN.fc.in_features NN.fc = nn.Linear(num_features,numOfClasses) elif model_arch.endswith('resnet152'): NN = models.resnet152(pretrained = False) num_features = NN.fc.in_features NN.fc = nn.Linear(num_features,numOfClasses) elif model_arch.endswith('densenet121_pretrained'): NN = models.densenet121(pretrained=True) num_ftrs = NN.classifier.in_features NN.classifier = nn.Linear(num_ftrs, numOfClasses) return NN
def main(args=None): parser = argparse.ArgumentParser(description='Simple testing script for RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.',default = "csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)',default="binary_class.csv") parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--csv_box_annot', help='Path to file containing predicted box annotations ') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--model', help='Path of .pt file with trained model',default = 'esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save',default = 'trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept',default=0.15) parser.add_argument('--nms_threshold', help='Score above which boxes are kept',default=0.2) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement',default=100) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition',default=50) parser.add_argument('--seg_level', help='Line or word, to choose anchor aspect ratio',default='line') parser.add_argument('--htr_gt_box',help='Train recognition branch with box gt (for debugging)',default=False) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'csv': if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) if parser.csv_box_annot is not None: box_annot_data = CSVDataset(train_file=parser.csv_box_annot, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: box_annot_data = None else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if box_annot_data is not None: sampler_val = AspectRatioBasedSampler(box_annot_data, batch_size=1, drop_last=False) dataloader_box_annot = DataLoader(box_annot_data, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) else: dataloader_box_annot = dataloader_val if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model alphabet=dataset_val.alphabet if os.path.exists(parser.model): retinanet = torch.load(parser.model) else: if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True,max_boxes=int(parser.max_boxes),score_threshold=float(parser.score_threshold),seg_level=parser.seg_level,alphabet=alphabet) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() #retinanet = torch.load('../Documents/TRAINED_MODELS/pytorch-retinanet/esposallescsv_retinanet_99.pt') #print "LOADED pretrained MODEL\n\n" optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.module.freeze_bn() best_cer = 1000 epochs_no_improvement=0 cers=[] retinanet.eval() retinanet.module.epochs_only_det = 0 #retinanet.module.htr_gt_box = False retinanet.training=False if parser.score_threshold is not None: retinanet.module.score_threshold = float(parser.score_threshold) '''if parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') ''' mAP = csv_eval.evaluate(dataset_val, retinanet,score_threshold=retinanet.module.score_threshold) aps = [] for k,v in mAP.items(): aps.append(v[0]) print ("VALID mAP:",np.mean(aps)) print("score th",retinanet.module.score_threshold) for idx,data in enumerate(dataloader_box_annot): print("Eval CER on validation set:",idx,"/",len(dataloader_box_annot),"\r") if box_annot_data: image_name = box_annot_data.image_names[idx].split('/')[-1].split('.')[-2] else: image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2] #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val) text_gt_path="/".join(dataset_val.image_names[idx].split('/')[:-1]) text_gt = os.path.join(text_gt_path,image_name+'.txt') f =open(text_gt,'r') text_gt_lines=f.readlines()[0] transcript_pred = get_transcript(image_name,data,retinanet,retinanet.module.score_threshold,float(parser.nms_threshold),dataset_val,alphabet) cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines)) print("GT",text_gt_lines) print("PREDS SAMPLE:",transcript_pred) print("VALID CER:",np.mean(cers),"best CER",best_cer) print("GT",text_gt_lines) print("PREDS SAMPLE:",transcript_pred) print("VALID CER:",np.mean(cers),"best CER",best_cer)
def main(args=None): #def main(epoch): parser = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)') parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) #parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint') parser.add_argument('--start-epoch', default=0, type=int, help='manual epoch number (useful on restarts)') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser = parser.parse_args(args) #args = parser.parse_args() #parser = parser.parse_args(epoch) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError('Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()])) else: raise ValueError('Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() #retinanet().load_state_dict(torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/resnet50-19c8e357.pth')) #if True: #print('==> Resuming from checkpoint..') #checkpoint = torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/coco_retinanet_2.pt') #retinanet().load_state_dict(checkpoint) #best_loss = checkpoint['loss'] #start_epoch = checkpoint['epoch'] retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True #optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) optimizer = optim.SGD(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() #retinanet.freeze_bn() #for train from a middle state retinanet.module.freeze_bn() #for train from the very beginning print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.start_epoch, parser.epochs): if parser.resume: if os.path.isfile(parser.resume): print("=>loading checkpoint '{}'".format(parser.resume)) checkpoint = torch.load(parser.resume) print(parser.start_epoch) #parser.start_epoch = checkpoint['epoch'] #retinanet.load_state_dict(checkpoint['state_dict']) retinanet=checkpoint #retinanet.load_state_dict(checkpoint) print(retinanet) #optimizer.load_state_dict(checkpoint) print("=> loaded checkpoint '{}' (epoch {})".format(parser.resume, checkpoint)) else: print("=> no checkpoint found at '{}'".format(parser.resume)) retinanet.train() retinanet.freeze_bn() #retinanet.module.freeze_bn() if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda()]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) #torch.save(retinanet.module, '{}_retinanet_101_{}.pt'.format(parser.dataset, epoch_num)) torch.save(retinanet, '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num)) name = '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num) parser.resume = '/users/wenchi/ghwwc/pytorch-retinanet-master_new/name' retinanet.eval() torch.save(retinanet, 'model_final_dilation_experiment1.pt'.format(epoch_num))
def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print( 'Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/' ) # 实例化SummaryWriter对象 tb_writer = SummaryWriter(log_dir="runs/flower_experiment") if os.path.exists("./weights") is False: os.makedirs("./weights") # 划分数据为训练集和验证集 train_images_path, train_images_label, val_images_path, val_images_label = read_split_data( args.data_path) # 定义训练以及预测时的预处理方法 data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } # 实例化训练数据集 train_data_set = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_data_set = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size # 计算使用num_workers的数量 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader( train_data_set, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_data_set.collate_fn) val_loader = torch.utils.data.DataLoader( val_data_set, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_data_set.collate_fn) # 实例化模型 model = resnet34(num_classes=args.num_classes).to(device) # 将模型写入tensorboard init_img = torch.zeros((1, 3, 224, 224), device=device) tb_writer.add_graph(model, init_img) # 如果存在预训练权重则载入 if os.path.exists(args.weights): weights_dict = torch.load(args.weights, map_location=device) load_weights_dict = { k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(load_weights_dict, strict=False) else: print("not using pretrain-weights.") # 是否冻结权重 if args.freeze_layers: print("freeze layers except fc layer.") for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "fc" not in name: para.requires_grad_(False) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * ( 1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) # update learning rate scheduler.step() # validate acc = evaluate(model=model, data_loader=val_loader, device=device) # add loss, acc and lr into tensorboard print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["train_loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) # add figure into tensorboard fig = plot_class_preds(net=model, images_dir="./plot_img", transform=data_transform["val"], num_plot=5, device=device) if fig is not None: tb_writer.add_figure("predictions vs. actuals", figure=fig, global_step=epoch) # add conv1 weights into tensorboard tb_writer.add_histogram(tag="conv1", values=model.conv1.weight, global_step=epoch) tb_writer.add_histogram(tag="layer1/block0/conv1", values=model.layer1[0].conv1.weight, global_step=epoch) # save weights torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))
def main(): device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print("using {} device.".format(device)) data_transform = { #torchvision.transforms是pytorch中的图像预处理包。一般用Compose把多个步骤整合到一起 "train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(),#依据概率p对PIL图片进行垂直翻转 transforms.ToTensor(),#将PIL Image或者 ndarray 转换为tensor,并且归一化至[0-1]; # 归一化至[0-1]是直接除以255,若自己的ndarray数据尺度有变化,则需要自行修改。 transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]), ##transforms.Normalize对图像标准化处理的参数,这里根据官网定义的 #transforms.Normalize使用如下公式进行归一化:channel=(channel-mean)/std。前面三个是mean、后面三个是std #因为totensor已经转化为[0-1],故最后的结果范围是[-1~1] "val": transforms.Compose([transforms.Resize(256), #,长宽比固定不动,最小边缩放到256 transforms.CenterCrop(224), #根据中心点剪切到224 transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}#标准化处理,防止突出数值较高的指标在综合分析中的作用。 data_root = os.path.abspath(os.path.join(os.getcwd(), "..\Test5_resnet")) # 返回绝对路径.表示返回上上一层目录 image_path = os.path.join(data_root, "data_set", "flower_data")# flower data set path # image_path = os.path.join(data_root, "data_set/flower_data") #再从根目录开始向下进行完整目录的拼接。 # image_path = os.path.join("D:\pycharmproject\deep learning for images" # "\deep-learning-for-image-processing-master\pytorch_classification" # "\Test5_resnet/flower_data/data_set/flower_data") assert os.path.exists(image_path), "{} path does not exist.".format(image_path) #断言(assert)作为一种软件调试的方法,提供了一种在代码中进行正确性检查的机制。 train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"]) ''' 其构造函数如下: #ImageFolder假设所有的文件按文件夹保存,每个文件夹下存储同一个类别的图片,文件夹名为类名, ImageFolder(root, transform=None, target_transform=None, loader=default_loader) 它主要有四个参数: root:在root指定的路径下寻找图片 transform:对PIL Image进行的转换操作,transform的输入是使用loader读取图片的返回对象 target_transform:对label的转换 loader:给定路径后如何读取图片,默认读取为RGB格式的PIL Image对象 ''' train_num = len(train_dataset) # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4} flower_list = train_dataset.class_to_idx #类别名与数字类别的映射关系字典(class_to_idx) cla_dict = dict((val, key) for key, val in flower_list.items()) # write dict into json file json_str = json.dumps(cla_dict, indent=4) #json.dumps()是把python对象转换成json对象的一个过程, # 生成的是字符串。cla_dict是转化成json的对象,indent:参数根据数据格式缩进显示,读起来更加清晰。 with open('class_indices.json', 'w') as json_file: json_file.write(json_str) ''' 因为传统打开文件是f = open('test.txt','r')如果存在则继续执行,然后f.close();如果不存在则会报错 改用with open as简化上面过程。无论有无异常均可自动调用f.close()方法。 ''' batch_size = 16 nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers 获取电脑的cpu个数。 print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader(train_dataset,#从实例化的dataset当中取得图片, # 然后打包成一个一个batch,然后输入网络进行训练 batch_size=batch_size, shuffle=True,#打乱数据集 num_workers=nw) #如果使用的是linux系统, # 将num_workers设置成一个大于0的数加快图像预处理进程 validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),#在image-path中找到val文件夹 transform=data_transform["val"]) val_num = len(validate_dataset) validate_loader = torch.utils.data.DataLoader(validate_dataset, batch_size=batch_size, shuffle=False, num_workers=nw) print("using {} images for training, {} images fot validation.".format(train_num, val_num)) net = resnet34() # load pretrain weights # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth model_weight_path = "./resnet34-pre.pth" #里面是预训练的权重 assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path) missing_keys, unexpected_keys = net.load_state_dict(torch.load(model_weight_path), strict=False) #载入模型权重 # for param in net.parameters(): # param.requires_grad = False # change fc layer structure in_channel = net.fc.in_features #这里的fc是model.ResNet里的fc;in_features是输入特征矩阵的深度 net.fc = nn.Linear(in_channel, 5) net.to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=0.0001) epochs = 3 best_acc = 0.0 save_path = './resNet34.pth' train_steps = len(train_loader) for epoch in range(epochs): # train net.train() running_loss = 0.0 train_bar = tqdm(train_loader) #tqdm显示进度 for step, data in enumerate(train_bar): images, labels = data optimizer.zero_grad() logits = net(images.to(device)) loss = loss_function(logits, labels.to(device)) loss.backward() optimizer.step() # print statistics running_loss += loss.item() train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1, epochs, loss) # validate net.eval() acc = 0.0 # accumulate accurate number / epoch with torch.no_grad(): val_bar = tqdm(validate_loader, colour='green') for val_data in val_bar: val_images, val_labels = val_data outputs = net(val_images.to(device)) # loss = loss_function(outputs, test_labels) predict_y = torch.max(outputs, dim=1)[1] acc += torch.eq(predict_y, val_labels.to(device)).sum().item() val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1, epochs) val_accurate = acc / val_num print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' % (epoch + 1, running_loss / train_steps, val_accurate)) if val_accurate > best_acc: best_acc = val_accurate torch.save(net.state_dict(), save_path) print('Finished Training')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--coco_path', help='Path to COCO directory', type=str, default='./data/coco') parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--checkpoint', help='The path to the checkpoint.', type=str, default=None) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--batch_size', help='Number of batch', type=int, default=16) parser.add_argument('--gpu_ids', help='Gpu parallel', type=str, default='1, 2') parser = parser.parse_args(args) # Create the data lodaders dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=16, collate_fn=collater, batch_sampler=sampler) sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() gpu_ids = parser.gpu_ids.split(',') device = torch.device("cuda:" + gpu_ids[0]) torch.cuda.set_device(device) gpu_ids = list(map(int, gpu_ids)) retinanet = torch.nn.DataParallel(retinanet, device_ids=gpu_ids).to(device) if parser.checkpoint: pretrained = torch.load(parser.checkpoint).state_dict() retinanet.module.load_state_dict(pretrained) # add tensorboard to record train log retinanet.training = True writer = SummaryWriter('./log') # writer.add_graph(retinanet, input_to_model=[images, labels]) retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].to(device), data['ann'].to(device)]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) writer.add_scalar('Loss/train', loss, iter_num) writer.add_scalar('Loss/reg_loss', regression_loss, iter_num) writer.add_scalar('Loss/cls_loss', classification_loss, iter_num) epoch_loss.append(float(loss)) if (iter_num + 1) % 1000 == 0: print('Save model') torch.save( retinanet.module, 'COCO_retinanet_epoch{}_iter{}.pt'.format( epoch_num, iter_num)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, writer) scheduler.step(np.mean(epoch_loss)) torch.save(retinanet.module, 'COCO_retinanet_{}.pt'.format(epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final.pt'.format(epoch_num))
# load image img = Image.open("../../data_set/flower_data/flower_photos/tulips/112428665_d8f3632f36_n.jpg") plt.imshow(img) # [N, C, H, W] img = data_transform(img) # expand batch dimension img = torch.unsqueeze(img, dim=0) # read class_indict try: json_file = open('./class_indices.json', 'r') class_indict = json.load(json_file) except Exception as e: print(e) exit(-1) # create model model = resnet34(num_classes=5) # load model weights model_weight_path = "./resNet34.pth" model.load_state_dict(torch.load(model_weight_path)) model.eval() with torch.no_grad(): # predict class output = torch.squeeze(model(img)) predict = torch.softmax(output, dim=0) predict_cla = torch.argmax(predict).numpy() print(class_indict[str(predict_cla)], predict[predict_cla].numpy()) plt.show()
def main(): global NORM_MEAN, NORM_STD, coconut_model, train_history_dict for arg in vars(args): print(str(arg) + ': ' + str(getattr(args, arg))) print('=' * 100) # Build Model base on dataset and arc num_classes = None if args.model_type == 'food179': num_classes = 179 NORM_MEAN = FOOD179_MEAN NORM_STD = FOOD179_STD elif args.model_type == 'nsfw': num_classes = 5 NORM_MEAN = NSFW_MEAN NORM_STD = NSFW_STD else: raise ('Not Implemented!') if args.model_arc == 'resnet18': coconut_model = model.resnet18(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'resnet34': coconut_model = model.resnet34(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'resnet50': coconut_model = model.resnet50(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'resnet101': coconut_model = model.resnet101(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'resnet152': coconut_model = model.resnet152(num_classes=num_classes, zero_init_residual=True) elif args.model_arc == 'mobilenet': coconut_model = model.MobileNetV2(n_class=num_classes, input_size=256) else: raise ('Not Implemented!') coconut_model = nn.DataParallel(coconut_model) if args.cuda: coconut_model = coconut_model.cuda() torch.backends.benchmark = True print("CUDA Enabled") gpu_count = torch.cuda.device_count() print('Total of %d GPU available' % (gpu_count)) args.train_batch_size = args.train_batch_size * gpu_count args.test_batch_size = args.test_batch_size * gpu_count print('args.train_batch_size: %d' % (args.train_batch_size)) print('args.test_batch_size: %d' % (args.test_batch_size)) model_parameters = filter(lambda p: p.requires_grad, coconut_model.parameters()) params = sum([np.prod(p.size()) for p in model_parameters]) print('Total of %d parameters' % (params)) # Build Training start_epoch = 0 best_acc = 0 optimizer = None scheduler = None milestones = [50, 150, 250] if args.train_optimizer == 'sgd': optimizer = optim.SGD(coconut_model.parameters(), lr=args.lr, momentum=0.9, nesterov=True, weight_decay=args.l2_reg) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) elif args.train_optimizer == 'adam': optimizer = optim.Adam(coconut_model.parameters(), lr=args.lr) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1) elif args.train_optimizer == 'adabound': optimizer = adabound.AdaBound(coconut_model.parameters(), lr=1e-3, final_lr=0.1) scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=150, gamma=0.1, last_epoch=-1) global_steps = 0 if not args.start_from_begining: filename = args.model_checkpoint_path if args.load_gpu_model_on_cpu: checkpoint = torch.load(filename, map_location=lambda storage, loc: storage) else: checkpoint = torch.load(filename) coconut_model.load_state_dict(checkpoint['model_state_dict']) optimizer.load_state_dict(checkpoint['model_optimizer']) best_acc = checkpoint['best_acc'] train_history_dict = checkpoint['train_history_dict'] scheduler.optimizer = optimizer # Not sure if this actually works start_epoch = checkpoint['epoch'] global_steps = checkpoint['global_steps'] print(filename + ' loaded!') data_loaders = load_datasets() train_ops(start_epoch=start_epoch, model=coconut_model, optimizer=optimizer, scheduler=scheduler, data_loaders=data_loaders, best_acc=best_acc, global_steps=global_steps)
init.normal(m.weight.data, 1.0, 0.02) init.constant(m.bias.data, 0.0) #m.weight.data.normal_(1.0, 0.02) #m.bias.data.fill_(0) elif classname.find('Linear') != -1: init.kaiming_normal(m.weight.data, a=0, mode='fan_in') init.constant(m.bias.data, 0.0) #m.weight.data.normal_(0.0, 0.02) def compute_accuracy(x, y): _, predicted = torch.max(x, dim=1) correct = (predicted == y).float() accuracy = torch.mean(correct) * 100.0 return accuracy Resnet34 = resnet34(num_classes=opt.out_class) Resnet34.apply(weights_init) criterion = nn.CrossEntropyLoss() if opt.cuda: Resnet34.cuda() criterion.cuda() if ngpu>1: Resnet34 = nn.DataParallel(Resnet34) if opt.Resnet34 != '': Resnet34.load_state_dict(torch.load(opt.Resnet34))
def main_fun(rank, world_size, args): if torch.cuda.is_available() is False: raise EnvironmentError("not find GPU device for training.") # 初始化各进程环境 start os.environ["MASTER_ADDR"] = "localhost" os.environ["MASTER_PORT"] = "12355" args.rank = rank args.world_size = world_size args.gpu = rank args.distributed = True torch.cuda.set_device(args.gpu) args.dist_backend = 'nccl' print('| distributed init (rank {}): {}'.format(args.rank, args.dist_url), flush=True) dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank) dist.barrier() # 初始化各进程环境 end rank = args.rank device = torch.device(args.device) batch_size = args.batch_size num_classes = args.num_classes weights_path = args.weights args.lr *= args.world_size # 学习率要根据并行GPU的数量进行倍增 if rank == 0: # 在第一个进程中打印信息,并实例化tensorboard print(args) print( 'Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/' ) tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_images_path, train_images_label, val_images_path, val_images_label = read_split_data( args.data_path) data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } # 实例化训练数据集 train_data_set = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_data_set = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) # 给每个rank对应的进程分配训练的样本索引 train_sampler = torch.utils.data.distributed.DistributedSampler( train_data_set) val_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set) # 将样本索引每batch_size个元素组成一个list train_batch_sampler = torch.utils.data.BatchSampler(train_sampler, batch_size, drop_last=True) nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers if rank == 0: print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader( train_data_set, batch_sampler=train_batch_sampler, pin_memory=True, num_workers=nw, collate_fn=train_data_set.collate_fn) val_loader = torch.utils.data.DataLoader( val_data_set, batch_size=batch_size, sampler=val_sampler, pin_memory=True, num_workers=nw, collate_fn=val_data_set.collate_fn) # 实例化模型 model = resnet34(num_classes=num_classes).to(device) # 如果存在预训练权重则载入 if os.path.exists(weights_path): weights_dict = torch.load(weights_path, map_location=device) load_weights_dict = { k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel() } model.load_state_dict(load_weights_dict, strict=False) else: checkpoint_path = os.path.join(tempfile.gettempdir(), "initial_weights.pt") # 如果不存在预训练权重,需要将第一个进程中的权重保存,然后其他进程载入,保持初始化权重一致 if rank == 0: torch.save(model.state_dict(), checkpoint_path) dist.barrier() # 这里注意,一定要指定map_location参数,否则会导致第一块GPU占用更多资源 model.load_state_dict(torch.load(checkpoint_path, map_location=device)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "fc" not in name: para.requires_grad_(False) else: # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义 if args.syncBN: # 使用SyncBatchNorm后训练会更耗时 model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to( device) # 转为DDP模型 model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu]) # optimizer pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * ( 1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): train_sampler.set_epoch(epoch) mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() sum_num = evaluate(model=model, data_loader=val_loader, device=device) acc = sum_num / val_sampler.total_size if rank == 0: print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.module.state_dict(), "./weights/model-{}.pth".format(epoch)) # 删除临时缓存文件 if rank == 0: if os.path.exists(checkpoint_path) is True: os.remove(checkpoint_path) cleanup()
def main(args=None): parser = argparse.ArgumentParser( description="Simple training script for training a RetinaNet network.") parser.add_argument( "--dataset", help="Dataset type, must be one of csv or coco or ycb.") parser.add_argument("--path", help="Path to dataset directory") parser.add_argument( "--csv_train", help="Path to file containing training annotations (see readme)") parser.add_argument("--csv_classes", help="Path to file containing class list (see readme)") parser.add_argument("--csv_val", help="Path to file containing validation annotations " "(optional, see readme)") parser.add_argument( "--depth", help="Resnet depth, must be one of 18, 34, 50, 101, 152", type=int, default=50) parser.add_argument("--epochs", help="Number of epochs", type=int, default=100) parser.add_argument("--evaluate_every", default=20, type=int) parser.add_argument("--print_every", default=20, type=int) parser.add_argument('--distributed', action="store_true", help='Run model in distributed mode with DataParallel') parser = parser.parse_args(args) # Create the data loaders if parser.dataset == "coco": if parser.path is None: raise ValueError( "Must provide --path when training on non-CSV datasets") dataset_train = CocoDataset(parser.path, ann_file="instances_train2014.json", set_name="train2014", transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer(min_side=512, max_side=512) ])) dataset_val = CocoDataset(parser.path, ann_file="instances_val2014.cars.json", set_name="val2014", transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == "ycb": dataset_train = YCBDataset(parser.path, "image_sets/train.txt", transform=transforms.Compose([ Normalizer(), Augmenter(), Resizer(min_side=512, max_side=512) ]), train=True) dataset_val = YCBDataset(parser.path, "image_sets/val.txt", transform=transforms.Compose( [Normalizer(), Resizer()]), train=False) elif parser.dataset == "csv": if parser.csv_train is None: raise ValueError("Must provide --csv_train when training on COCO,") if parser.csv_classes is None: raise ValueError( "Must provide --csv_classes when training on COCO,") dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print("No validation annotations provided.") else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( "Dataset type not understood (must be csv or coco), exiting.") sampler = AspectRatioBasedSampler(dataset_train, batch_size=12, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=4, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( "Unsupported model depth, must be one of 18, 34, 50, 101, 152") print("CUDA available: {}".format(torch.cuda.is_available())) if torch.cuda.is_available(): device = "cuda" else: device = "cpu" retinanet = retinanet.to(device) if parser.distributed: retinanet = torch.nn.DataParallel(retinanet) optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) print("Num training images: {}".format(len(dataset_train))) best_mean_avg_prec = 0.0 for epoch_num in range(parser.epochs): retinanet.train() retinanet.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data["img"].to(device).float(), data["annot"]]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss.item())) epoch_loss.append(float(loss.item())) if parser.print_every % iter_num == 0: print("Epoch: {} | Iteration: {}/{} | " "Classification loss: {:1.5f} | " "Regression loss: {:1.5f} | " "Running loss: {:1.5f}".format( epoch_num, iter_num, len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if ((epoch_num + 1) % parser.evaluate_every == 0) or epoch_num + 1 == parser.epochs: mAP = 0.0 if parser.dataset == "coco": print("Evaluating dataset") mAP = coco_eval.evaluate_coco(dataset_val, retinanet) else: print("Evaluating dataset") AP = eval.evaluate(dataset_val, retinanet) mAP = np.asarray([x[0] for x in AP.values()]).mean() print("Val set mAP: ", mAP) if mAP > best_mean_avg_prec: best_mean_avg_prec = mAP torch.save( retinanet.state_dict(), "{}_retinanet_best_mean_ap_{}.pt".format( parser.dataset, epoch_num)) scheduler.step(np.mean(epoch_loss)) retinanet.eval() torch.save(retinanet.state_dict(), "retinanet_model_final.pt")
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser.add_argument('--optimizer', help='[SGD | Adam]', type=str, default='SGD') parser.add_argument('--model', help='Path to model (.pt) file.') parser = parser.parse_args(args) # Create the data loaders print("\n[Phase 1]: Creating DataLoader for {} dataset".format( parser.dataset)) if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2014', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2014', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=8, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=8, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=16, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=8, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') print('| Num training images: {}'.format(len(dataset_train))) print('| Num test images : {}'.format(len(dataset_val))) print("\n[Phase 2]: Preparing RetinaNet Detection Model...") use_gpu = torch.cuda.is_available() if use_gpu: device = torch.device('cuda') retinanet = retinanet.to(device) retinanet = torch.nn.DataParallel(retinanet, device_ids=range( torch.cuda.device_count())) print("| Using %d GPUs for Train/Validation!" % torch.cuda.device_count()) retinanet.training = True if parser.optimizer == 'Adam': optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) # not mentioned print("| Adam Optimizer with Learning Rate = {}".format(1e-5)) elif parser.optimizer == 'SGD': optimizer = optim.SGD(retinanet.parameters(), lr=1e-2, momentum=0.9, weight_decay=1e-4) print("| SGD Optimizer with Learning Rate = {}".format(1e-2)) else: raise ValueError('Unsupported Optimizer, must be one of [SGD | Adam]') scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn( ) # Freeze the BN parameters to ImageNet configuration # Check if there is a 'checkpoints' path if not osp.exists('./checkpoints/'): os.makedirs('./checkpoints/') print("\n[Phase 3]: Training Model on {} dataset...".format( parser.dataset)) for epoch_num in range(parser.epochs): epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].to(device), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.001) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) sys.stdout.write('\r') sys.stdout.write( '| Epoch: {} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num + 1, iter_num + 1, len(dataloader_train), float(classification_loss), float(regression_loss), np.mean(loss_hist))) sys.stdout.flush() del classification_loss del regression_loss except Exception as e: print(e) continue print("\n| Saving current best model at epoch {}...".format(epoch_num + 1)) torch.save( retinanet.state_dict(), './checkpoints/{}_retinanet_{}.pt'.format(parser.dataset, epoch_num + 1)) if parser.dataset == 'coco': #print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet, device) elif parser.dataset == 'csv' and parser.csv_val is not None: #print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet, device) scheduler.step(np.mean(epoch_loss)) retinanet.eval() torch.save(retinanet.state_dict(), './checkpoints/model_final.pt')
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.', default="csv") parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)', default="binary_class.csv") parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18) parser.add_argument('--epochs', help='Number of epochs', type=int, default=500) parser.add_argument('--epochs_only_det', help='Number of epochs to train detection part', type=int, default=1) parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement', type=int, default=100) parser.add_argument('--pretrained_model', help='Path of .pt file with pretrained model', default='esposallescsv_retinanet_0.pt') parser.add_argument('--model_out', help='Path of .pt file with trained model to save', default='trained') parser.add_argument('--score_threshold', help='Score above which boxes are kept', type=float, default=0.5) parser.add_argument('--nms_threshold', help='Score above which boxes are kept', type=float, default=0.2) parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition', default=95) parser.add_argument('--seg_level', help='[line, word], to choose anchor aspect ratio', default='word') parser.add_argument( '--early_stop_crit', help='Early stop criterion, detection (map) or transcription (cer)', default='cer') parser.add_argument('--max_iters_epoch', help='Max steps per epoch (for debugging)', default=1000000) parser.add_argument('--train_htr', help='Train recognition or not', default='True') parser.add_argument('--train_det', help='Train detection or not', default='True') parser.add_argument( '--binary_classifier', help= 'Wether to use classification branch as binary or not, multiclass instead.', default='False') parser.add_argument( '--htr_gt_box', help='Train recognition branch with box gt (for debugging)', default='False') parser.add_argument( '--ner_branch', help='Train named entity recognition with separate branch', default='False') parser = parser.parse_args(args) if parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train') dataset_name = parser.csv_train.split("/")[-2] dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') # Files for training log experiment_id = str(time.time()).split('.')[0] valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'w') for arg in vars(parser): if getattr(parser, arg) is not None: valid_cer_f.write( str(arg) + ' ' + str(getattr(parser, arg)) + '\n') current_commit = subprocess.check_output(['git', 'rev-parse', 'HEAD']) valid_cer_f.write(str(current_commit)) valid_cer_f.write( "epoch_num cer best cer mAP best mAP time\n") valid_cer_f.close() sampler = AspectRatioBasedSampler(dataset_train, batch_size=1, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val) if not os.path.exists('trained_models'): os.mkdir('trained_models') # Create the model train_htr = parser.train_htr == 'True' htr_gt_box = parser.htr_gt_box == 'True' ner_branch = parser.ner_branch == 'True' binary_classifier = parser.binary_classifier == 'True' torch.backends.cudnn.benchmark = False alphabet = dataset_train.alphabet if os.path.exists(parser.pretrained_model): retinanet = torch.load(parser.pretrained_model) retinanet.classificationModel = ClassificationModel( num_features_in=256, num_anchors=retinanet.anchors.num_anchors, num_classes=dataset_train.num_classes()) if ner_branch: retinanet.nerModel = NERModel( feature_size=256, pool_h=retinanet.pool_h, n_classes=dataset_train.num_classes(), pool_w=retinanet.pool_w) else: if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True, max_boxes=int(parser.max_boxes), score_threshold=float( parser.score_threshold), seg_level=parser.seg_level, alphabet=alphabet, train_htr=train_htr, htr_gt_box=htr_gt_box, ner_branch=ner_branch, binary_classifier=binary_classifier) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True, max_boxes=int(parser.max_boxes), score_threshold=float( parser.score_threshold), seg_level=parser.seg_level, alphabet=alphabet, train_htr=train_htr, htr_gt_box=htr_gt_box) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101( num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152( num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True train_htr = parser.train_htr == 'True' train_det = parser.train_det == 'True' retinanet.htr_gt_box = parser.htr_gt_box == 'True' retinanet.train_htr = train_htr retinanet.epochs_only_det = parser.epochs_only_det if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-4) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=50, verbose=True) loss_hist = collections.deque(maxlen=500) ctc = CTCLoss() retinanet.train() retinanet.module.freeze_bn() best_cer = 1000 best_map = 0 epochs_no_improvement = 0 verbose_each = 20 optimize_each = 1 objective = 100 best_objective = 10000 print(('Num training images: {}'.format(len(dataset_train)))) for epoch_num in range(parser.epochs): cers = [] retinanet.training = True retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): if iter_num > int(parser.max_iters_epoch): break try: if iter_num % optimize_each == 0: optimizer.zero_grad() (classification_loss, regression_loss, ctc_loss, ner_loss) = retinanet([ data['img'].cuda().float(), data['annot'], ctc, epoch_num ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() if train_det: if train_htr: loss = ctc_loss + classification_loss + regression_loss + ner_loss else: loss = classification_loss + regression_loss + ner_loss elif train_htr: loss = ctc_loss else: continue if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) if iter_num % verbose_each == 0: print(( 'Epoch: {} | Step: {} |Classification loss: {:1.5f} | Regression loss: {:1.5f} | CTC loss: {:1.5f} | NER loss: {:1.5f} | Running loss: {:1.5f} | Total loss: {:1.5f}\r' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), float(ctc_loss), float(ner_loss), np.mean(loss_hist), float(loss), "\r"))) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) torch.cuda.empty_cache() except Exception as e: print(e) continue if parser.dataset == 'csv' and parser.csv_val is not None and train_det: print('Evaluating dataset') mAP, text_mAP, current_cer = csv_eval.evaluate( dataset_val, retinanet, score_threshold=parser.score_threshold) #text_mAP,_ = csv_eval_binary_map.evaluate(dataset_val, retinanet,score_threshold=parser.score_threshold) objective = current_cer * (1 - mAP) retinanet.eval() retinanet.training = False retinanet.score_threshold = float(parser.score_threshold) '''for idx,data in enumerate(dataloader_val): if idx>int(parser.max_iters_epoch): break print("Eval CER on validation set:",idx,"/",len(dataset_val),"\r") image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2] #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val) text_gt =".".join(dataset_val.image_names[idx].split('.')[:-1])+'.txt' f =open(text_gt,'r') text_gt_lines=f.readlines()[0] transcript_pred = get_transcript(image_name,data,retinanet,float(parser.score_threshold),float(parser.nms_threshold),dataset_val,alphabet) cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))''' t = str(time.time()).split('.')[0] valid_cer_f.close() #print("GT",text_gt_lines) #print("PREDS SAMPLE:",transcript_pred) if parser.early_stop_crit == 'cer': if float(objective) < float( best_objective): #float(current_cer)<float(best_cer): best_cer = current_cer best_objective = objective epochs_no_improvement = 0 torch.save( retinanet.module, 'trained_models/' + parser.model_out + '{}_retinanet.pt'.format(parser.dataset)) else: epochs_no_improvement += 1 if mAP > best_map: best_map = mAP elif parser.early_stop_crit == 'map': if mAP > best_map: best_map = mAP epochs_no_improvement = 0 torch.save( retinanet.module, 'trained_models/' + parser.model_out + '{}_retinanet.pt'.format(parser.dataset)) else: epochs_no_improvement += 1 if float(current_cer) < float(best_cer): best_cer = current_cer if train_det: print(epoch_num, "mAP: ", mAP, " best mAP", best_map) if train_htr: print("VALID CER:", current_cer, "best CER", best_cer) print("Epochs no improvement:", epochs_no_improvement) valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'a') valid_cer_f.write( str(epoch_num) + " " + str(current_cer) + " " + str(best_cer) + ' ' + str(mAP) + ' ' + str(best_map) + ' ' + str(text_mAP) + '\n') if epochs_no_improvement > 3: for param_group in optimizer.param_groups: if param_group['lr'] > 10e-5: param_group['lr'] *= 0.1 if epochs_no_improvement >= parser.max_epochs_no_improvement: print("TRAINING FINISHED AT EPOCH", epoch_num, ".") sys.exit() scheduler.step(np.mean(epoch_loss)) torch.cuda.empty_cache() retinanet.eval()
def main(args): device = torch.device(args.device if torch.cuda.is_available() else "cpu") print(args) print( 'Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/' ) tb_writer = SummaryWriter() if os.path.exists("./weights") is False: os.makedirs("./weights") train_info, val_info, num_classes = read_split_data(args.data_path) train_images_path, train_images_label = train_info val_images_path, val_images_label = val_info # check num_classes assert args.num_classes == num_classes, "dataset num_classes: {}, input {}".format( args.num_classes, num_classes) data_transform = { "train": transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), "val": transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } # 实例化训练数据集 train_data_set = MyDataSet(images_path=train_images_path, images_class=train_images_label, transform=data_transform["train"]) # 实例化验证数据集 val_data_set = MyDataSet(images_path=val_images_path, images_class=val_images_label, transform=data_transform["val"]) batch_size = args.batch_size nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers print('Using {} dataloader workers every process'.format(nw)) train_loader = torch.utils.data.DataLoader( train_data_set, batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=nw, collate_fn=train_data_set.collate_fn) val_loader = torch.utils.data.DataLoader( val_data_set, batch_size=batch_size, shuffle=False, pin_memory=True, num_workers=nw, collate_fn=val_data_set.collate_fn) # 如果存在预训练权重则载入 model = resnet34(num_classes=args.num_classes).to(device) if args.weights != "": if os.path.exists(args.weights): weights_dict = torch.load(args.weights, map_location=device) load_weights_dict = { k: v for k, v in weights_dict.items() if model.state_dict()[k].numel() == v.numel() } print(model.load_state_dict(load_weights_dict, strict=False)) else: raise FileNotFoundError("not found weights file: {}".format( args.weights)) # 是否冻结权重 if args.freeze_layers: for name, para in model.named_parameters(): # 除最后的全连接层外,其他权重全部冻结 if "fc" not in name: para.requires_grad_(False) pg = [p for p in model.parameters() if p.requires_grad] optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005) # Scheduler https://arxiv.org/pdf/1812.01187.pdf lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * ( 1 - args.lrf) + args.lrf # cosine scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf) for epoch in range(args.epochs): # train mean_loss = train_one_epoch(model=model, optimizer=optimizer, data_loader=train_loader, device=device, epoch=epoch) scheduler.step() # validate sum_num = evaluate(model=model, data_loader=val_loader, device=device) acc = sum_num / len(val_data_set) print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3))) tags = ["loss", "accuracy", "learning_rate"] tb_writer.add_scalar(tags[0], mean_loss, epoch) tb_writer.add_scalar(tags[1], acc, epoch) tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch) torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))
def main(args=None): parser = argparse.ArgumentParser( description='Simple training script for training a RetinaNet network.') parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.') parser.add_argument('--coco_path', help='Path to COCO directory') parser.add_argument( '--csv_train', help='Path to file containing training annotations (see readme)') parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)') parser.add_argument( '--csv_val', help= 'Path to file containing validation annotations (optional, see readme)' ) parser.add_argument( '--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50) parser.add_argument('--epochs', help='Number of epochs', type=int, default=100) parser = parser.parse_args(args) # Create the data loaders if parser.dataset == 'coco': if parser.coco_path is None: raise ValueError('Must provide --coco_path when training on COCO,') dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose( [Normalizer(), Resizer()])) elif parser.dataset == 'csv': if parser.csv_train is None: raise ValueError('Must provide --csv_train when training on COCO,') if parser.csv_classes is None: raise ValueError( 'Must provide --csv_classes when training on COCO,') dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Augmenter(), Resizer()])) if parser.csv_val is None: dataset_val = None print('No validation annotations provided.') else: dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose( [Normalizer(), Resizer()])) else: raise ValueError( 'Dataset type not understood (must be csv or coco), exiting.') sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False) dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler) if dataset_val is not None: sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False) dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val) # Create the model if parser.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif parser.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') use_gpu = True if use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) loss_hist = collections.deque(maxlen=500) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) for epoch_num in range(parser.epochs): retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] for iter_num, data in enumerate(dataloader_train): try: optimizer.zero_grad() classification_loss, regression_loss = retinanet( [data['img'].cuda().float(), data['annot']]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() loss_hist.append(float(loss)) epoch_loss.append(float(loss)) print( 'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}' .format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist))) del classification_loss del regression_loss except Exception as e: print(e) continue if parser.dataset == 'coco': print('Evaluating dataset') coco_eval.evaluate_coco(dataset_val, retinanet) elif parser.dataset == 'csv' and parser.csv_val is not None: print('Evaluating dataset') mAP = csv_eval.evaluate(dataset_val, retinanet) scheduler.step(np.mean(epoch_loss)) torch.save( retinanet.module, '{}_retinanet_dilation_{}.pt'.format(parser.dataset, epoch_num)) retinanet.eval() torch.save(retinanet, 'model_final_dilation.pt'.format(epoch_num))
def main(config): # set seed for reproducibility np.random.seed(0) torch.manual_seed(0) torch.cuda.manual_seed(0) # create folder for model newpath = './models/' + config.model_date if config.save_model: os.makedirs(newpath) # Create the data loaders if config.csv_train is None: raise ValueError('Must provide --csv_train when training on csv,') if config.csv_classes is None: raise ValueError('Must provide --csv_classes when training on csv,') train_dataset = datasets.ImageFolder(os.path.join(config.data_dir, 'train')) dataset_train = GetDataset(train_file=config.csv_train, class_list=config.csv_classes, transform=transforms.Compose( [Augmenter(), Resizer()]), dataset=train_dataset, seed=0) dataloader_train = DataLoader(dataset_train, batch_size=config.batch_size, shuffle=True, num_workers=1, collate_fn=collater) if config.csv_val is None: dataset_val = None print('No validation annotations provided.') else: valid_dataset = datasets.ImageFolder( os.path.join(config.data_dir, 'valid')) dataset_val = GetDataset(train_file=config.csv_val, class_list=config.csv_classes, transform=transforms.Compose([Resizer()]), dataset=valid_dataset, seed=0) # Create the model if config.depth == 18: retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) elif config.depth == 34: retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True) elif config.depth == 50: retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True) elif config.depth == 101: retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True) elif config.depth == 152: retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True) else: raise ValueError( 'Unsupported model depth, must be one of 18, 34, 50, 101, 152') if config.use_gpu: retinanet = retinanet.cuda() retinanet = torch.nn.DataParallel(retinanet).cuda() retinanet.training = True optimizer = optim.Adam(retinanet.parameters(), lr=1e-5) scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) retinanet.train() retinanet.module.freeze_bn() print('Num training images: {}'.format(len(dataset_train))) best_valid_map = 0 counter = 0 batch_size = config.batch_size for epoch_num in range(config.epochs): print('\nEpoch: {}/{}'.format(epoch_num + 1, config.epochs)) retinanet.train() retinanet.module.freeze_bn() epoch_loss = [] train_batch_time = AverageMeter() train_losses = AverageMeter() tic = time.time() with tqdm(total=len(dataset_train)) as pbar: for iter_num, data in enumerate(dataloader_train): # try: optimizer.zero_grad() siamese_loss, classification_loss, regression_loss = retinanet( [ data['img'].cuda().float(), data['annot'], data['pair'].cuda().float() ]) classification_loss = classification_loss.mean() regression_loss = regression_loss.mean() loss = classification_loss + regression_loss + siamese_loss if bool(loss == 0): continue loss.backward() torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() epoch_loss.append(float(loss)) toc = time.time() train_losses.update(float(loss), batch_size) train_batch_time.update(toc - tic) tic = time.time() pbar.set_description(("{:.1f}s - loss: {:.3f}".format( train_batch_time.val, train_losses.val, ))) pbar.update(batch_size) del classification_loss del regression_loss del siamese_loss # except Exception as e: # print('Training error: ', e) # continue if config.csv_val is not None: print('Evaluating dataset') mAP, correct = eval_new.evaluate(dataset_val, retinanet) # is_best = mAP[0][0] > best_valid_map # best_valid_map = max(mAP[0][0], best_valid_map) is_best = correct > best_valid_map best_valid_map = max(correct, best_valid_map) if is_best: counter = 0 else: counter += 1 if counter > 3: print("[!] No improvement in a while, stopping training.") break scheduler.step(np.mean(epoch_loss)) if is_best and config.save_model: torch.save( retinanet.state_dict(), './models/{}/best_retinanet.pt'.format(config.model_date)) if config.save_model: torch.save( retinanet.state_dict(), './models/{}/{}_retinanet_{}.pt'.format( config.model_date, config.depth, epoch_num)) msg = "train loss: {:.3f} - val map: {:.3f} - val acc: {:.3f}%" print( msg.format(train_losses.avg, mAP[0][0], (100. * correct) / len(dataset_val)))