Example #1
0
def model_init(model_name):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if model_name == 'retinanet' :
      #weight_file_path = '/content/retinanet/resnet34-333f7ec4.pth'
      #weight_file_path = '/content/retinanet/CP_epoch5.pth'
      weight_file_path = '/content/retinanet/retinanet50_pretrained.pth'

    total_keys = len(list(torch.load(weight_file_path).keys()))

    # Create the model
    if total_keys >= 102 and total_keys < 182 :
        retinanet = model.resnet18(num_classes=num_classes, pretrained=False)

    elif total_keys >= 182 and total_keys < 267:
        retinanet = model.resnet34(num_classes=num_classes, pretrained=False)
        
    elif total_keys >= 267 and total_keys < 522:
        retinanet = model.resnet50(num_classes=num_classes, pretrained=False)
        
    elif total_keys >= 522 and total_keys < 777:
        retinanet = model.resnet101(num_classes=num_classes, pretrained=False)
        
    elif total_keys >= 777:
        retinanet = model.resnet152(num_classes=num_classes, pretrained=False)
        
    else:
        raise ValueError('Unsupported model backbone, must be one of resnet18, resnet34, resnet50, resnet101, resnet152')

    retinanet.load_state_dict(torch.load(weight_file_path, map_location=device), strict=False) # Initialisng Model with loaded weights
    print('model initialized..')

    return retinanet, device
Example #2
0
def main():
    weights_path = "./resNet34.pth"
    model = resnet34(num_classes=5)
    model.load_state_dict(torch.load(weights_path, map_location=device))
    model.to(device)
    # validate_model(model)
    # module = model.conv1
    # print(list(module.named_parameters()))
    # # print(list(module.named_buffers()))
    #
    # # 裁剪50%的卷积核
    # prune.ln_structured(module, name="weight", amount=0.5, n=2, dim=0)
    # print(list(module.weight))
    # print(module.weight.shape)
    # # print(list(module.named_buffers()))
    #
    # prune.remove(module, "weight")
    # print(module.weight.shape)

    # 收集所有需要裁剪的卷积核
    parameters_to_prune = []
    for name, module in model.named_modules():
        if isinstance(module, torch.nn.Conv2d):
            parameters_to_prune.append((module, "weight"))

    # 对卷积核进行剪枝处理
    prune.global_unstructured(parameters_to_prune,
                              pruning_method=prune.L1Unstructured,
                              amount=0.5)

    # 统计剪枝比例
    count_sparsity(model, p=False)

    # 验证剪枝后的模型
    validate_model(model)
Example #3
0
def pytorch_model_speed(data_loader, val_num):
    net = resnet34(num_classes=5)
    # load weights
    model_weight_path = "./resNet34.pth"
    check_path_exist(model_weight_path)
    net.load_state_dict(torch.load(model_weight_path, map_location=device),
                        strict=False)
    net.eval()
    test_data = torch.rand((1, 3, 224, 224))
    net(test_data.to(device))

    forward_time = 0
    acc = 0.0  # accumulate accurate number / epoch
    with torch.no_grad():
        for val_data in tqdm(data_loader, desc="Running pytorch model..."):
            val_images, val_labels = val_data
            t1 = time.time()
            outputs = net(val_images.to(
                device))  # eval model only have last output layer
            t2 = time.time()
            forward_time += (t2 - t1)
            predict_y = torch.max(outputs, dim=1)[1]
            acc += (predict_y == val_labels.to(device)).sum().item()
        val_accurate = acc / val_num
    fps = round(val_num / forward_time, 1)
    print("pytorch info:\nfps: {}/s  accuracy: {}\n".format(fps, val_accurate))
    return fps, val_accurate, "Pytorch(not opt)"
Example #4
0
    def __init__(self, model_weight_path, json_file):
        # self.data_transform = transforms.Compose([transforms.Resize(256),
        #                                           transforms.CenterCrop(224),
        #                                           transforms.ToTensor(),
        #                                           transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

        #
        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        print(self.device)

        self.device_cpu = torch.device("cpu")
        self.data_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        json_filef = open(json_file, "r", encoding="utf-8")

        self.class_indict = json.load(json_filef)

        # create model
        if (str.find(model_weight_path, "res50.pth") > 0):
            self.model = resnet50(num_classes=len(self.class_indict))  # res50
        else:
            self.model = resnet34(num_classes=len(self.class_indict))
        # load model weights
        self.model.load_state_dict(torch.load(model_weight_path))
        self.model.eval()
        self.model.to(self.device)
Example #5
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # load image
    img_path_list = ["./tulip.jpg", "./rose.jpg"]
    img_list = []
    # img_list= Image.open(img_path_list).convert('RGB')

    for img_path in img_path_list:
        assert os.path.exists(img_path), "file: '{}' dose not exist.".format(
            img_path)
        img = Image.open(img_path)
        img = data_transform(img)
        img_list.append(img)

    # batch img
    batch_img = torch.stack(img_list,
                            dim=0)  #torch.stack()是将原来的几个tensor按照一定方式进行堆叠,
    # 然后在按照堆叠后的维度进行切分。一共有0,1,2三个维度

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(
        json_path)

    json_file = open(json_path, "r")
    class_indict = json.load(json_file)

    # create model
    model = resnet34(num_classes=5).to(device)

    # load model weights
    weights_path = "./resNet34.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(
        weights_path)
    model.load_state_dict(torch.load(weights_path, map_location=device))

    # prediction
    model.eval()
    with torch.no_grad():
        # predict class
        output = model(batch_img.to(device)).cpu()
        predict = torch.softmax(output, dim=1)
        probs, classes = torch.max(predict, dim=1)

        for idx, (pro, cla) in enumerate(zip(probs, classes)):
            print("image: {}  class: {}  prob: {:.3}".format(
                img_path_list[idx], class_indict[str(cla.numpy())],
                pro.numpy()))
Example #6
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    data_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    # load image
    img_path = "../tulip.jpg"
    assert os.path.exists(img_path), "file: '{}' dose not exist.".format(
        img_path)
    img = Image.open(img_path)
    plt.imshow(img)
    # [N, C, H, W]
    img = data_transform(img)
    # expand batch dimension
    img = torch.unsqueeze(img, dim=0)

    # read class_indict
    json_path = './class_indices.json'
    assert os.path.exists(json_path), "file: '{}' dose not exist.".format(
        json_path)

    json_file = open(json_path, "r")
    class_indict = json.load(json_file)

    # create model
    model = resnet34(num_classes=5).to(device)

    # load model weights
    weights_path = "./resNet34.pth"
    assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(
        weights_path)
    model.load_state_dict(torch.load(weights_path, map_location=device))

    # prediction
    model.eval()
    with torch.no_grad():
        # predict class
        output = torch.squeeze(model(img.to(device))).cpu()
        predict = torch.softmax(output, dim=0)
        predict_cla = torch.argmax(predict).numpy()

    print_res = "class: {}   prob: {:.3}".format(
        class_indict[str(predict_cla)], predict[predict_cla].numpy())
    plt.title(print_res)
    for i in range(len(predict)):
        print("class: {:10}   prob: {:.3}".format(class_indict[str(i)],
                                                  predict[i].numpy()))
    plt.show()
Example #7
0
    def load_model(self):
        self.checkpoint = torch.load(self.model_checkpoint_file_path,
                                     map_location=lambda storage, loc: storage)
        self.model_args = self.checkpoint['args']

        self.num_classes = None
        if self.model_args.model_type == 'food179':
            self.num_classes = 179
        elif self.model_args.model_type == 'nsfw':
            self.num_classes = 5
        else:
            raise ('Not Implemented!')

        if self.model_args.model_arc == 'resnet18':
            self.model = model.resnet18(num_classes=self.num_classes,
                                        zero_init_residual=True)
        elif self.model_args.model_arc == 'resnet34':
            self.model = model.resnet34(num_classes=self.num_classes,
                                        zero_init_residual=True)
        elif self.model_args.model_arc == 'resnet50':
            self.model = model.resnet50(num_classes=self.num_classes,
                                        zero_init_residual=True)
        elif self.model_args.model_arc == 'resnet101':
            self.model = model.resnet101(num_classes=self.num_classes,
                                         zero_init_residual=True)
        elif self.model_args.model_arc == 'resnet152':
            self.model = model.resnet152(num_classes=self.num_classes,
                                         zero_init_residual=True)
        elif self.model_args.model_arc == 'mobilenet':
            self.model = model.MobileNetV2(n_class=self.num_classes,
                                           input_size=256)
        else:
            raise ('Not Implemented!')

        self.model = nn.DataParallel(self.model)
        self.model.load_state_dict(self.checkpoint['model_state_dict'])
        self.model_epoch = self.checkpoint['epoch']
        self.model_test_acc = self.checkpoint['test_acc']
        self.model_best_acc = self.checkpoint['best_acc']
        self.model_test_acc_top5 = self.checkpoint['test_acc_top5']
        self.model_class_to_idx = self.checkpoint['class_to_idx']
        self.model_idx_to_class = {
            v: k
            for k, v in self.model_class_to_idx.items()
        }
        self.model_train_history_dict = self.checkpoint['train_history_dict']
        self.mean = self.checkpoint['NORM_MEAN']
        self.std = self.checkpoint['NORM_STD']
        self.model.eval()

        return
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # load pretrain weights
    # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
    model_weight_path = "./resnet34-pre.pth"
    assert os.path.exists(model_weight_path), "file {} does not exist.".format(
        model_weight_path)

    # option1
    net = resnet34()
    net.load_state_dict(torch.load(model_weight_path, map_location=device))
    # change fc layer structure
    in_channel = net.fc.in_features
    net.fc = nn.Linear(in_channel, 5)
Example #9
0
def main(args=None):

    data_set = {
        x: guipang(cfg=cfg['dataset_guipang'], part=x) for x in ['train', 'val']
    }
    # data_set = {
    #     x: qiafan(cfg=cfg['dataset_qiafan'], part=x) for x in ['train', 'val']
    # }
    data_loader = {
        x: data.DataLoader(data_set[x], batch_size=cfg['batch_size'],
                           num_workers=4, shuffle=True, pin_memory=False)
        for x in ['train', 'val']
    }

	# Create the model
	if cfg['depth'] == 18:
		retinanet = model.resnet18(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	elif cfg['depth'] == 34:
		retinanet = model.resnet34(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	elif cfg['depth'] == 50:
		retinanet = model.resnet50(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	elif cfg['depth'] == 101:
		retinanet = model.resnet101(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	elif cfg['depth'] == 152:
		retinanet = model.resnet152(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	else:
		raise ValueError(
		    'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()

	retinanet = torch.nn.DataParallel(retinanet).cuda()

	optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
Example #10
0
    def __init__(self, model_weight_path, json_file):
        # self.data_transform = transforms.Compose([transforms.Resize(256),
        #                                           transforms.CenterCrop(224),
        #                                           transforms.ToTensor(),
        #                                           transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

        self.data_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

        json_filef = open(json_file, "r", encoding="utf-8")

        self.class_indict = json.load(json_filef)

        # create model
        self.model = resnet34(num_classes=len(self.class_indict))
        # load model weights
        self.model.load_state_dict(torch.load(model_weight_path))
        self.model.eval()
Example #11
0
def get_network(args):
    '''
    return the required network
    '''

    if args.net == 'resnet34':
        from model import resnet34
        net = resnet34(args.num_classes)

    elif args.net == 'resnet50':
        from model import resnet50
        net = resnet50(args.num_classes)

    else:
        print(args.net +
              ' is not supported, please enter resnet34 or resnet50')

    if args.gpu:
        net = net.cuda()

    return net
Example #12
0
def main(args=None):
    from dataloader import JinNanDataset, Augmenter, UnNormalizer, Normalizer,Resizer
    from torch.utils.data import Dataset, DataLoader
    from torchvision import datasets, models, transforms
    import model
    import torch
    import argparse


    parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--dataset',default='jingnan', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--threshold',help='treshold')
    parser.add_argument('--dataset_path', help='Path to file containing training and validation annotations (optional, see readme)') 
    parser.add_argument('--model_path',help=('the model path'))
    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser = parser.parse_args(args)

    dataset_val=JinNanDataset(parser.dataset_path, set_name='val', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
    # Create the model
    if parser.depth == 18:
    	retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True)
    elif parser.depth == 34:
    	retinanet = model.resnet34(num_classes=dataset_val.num_classes(), pretrained=True)
    elif parser.depth == 50:
    	retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True)
    elif parser.depth == 101:
    	retinanet = model.resnet101(num_classes=dataset_val.num_classes(), pretrained=True)
    elif parser.depth == 152:
    	retinanet = model.resnet152(num_classes=dataset_val.num_classes(), pretrained=True)
    else:
    	raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')		

    retinanet=torch.load(parser.model_path)
    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    retinanet.eval()
    print('Evaluating dataset')
    evaluate_jinnan(dataset_val, retinanet)
    def build(self, depth=50, learning_rate=1e-5, ratios=[0.5, 1, 2],
              scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]):
        # Create the model
        if depth == 18:
            retinanet = model.resnet18(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                       weights_dir=self.weights_dir_path,
                                       pretrained=True)
        elif depth == 34:
            retinanet = model.resnet34(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                       weights_dir=self.weights_dir_path,
                                       pretrained=True)
        elif depth == 50:
            retinanet = model.resnet50(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                       weights_dir=self.weights_dir_path,
                                       pretrained=True)
        elif depth == 101:
            retinanet = model.resnet101(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                        weights_dir=self.weights_dir_path,
                                        pretrained=True)
        elif depth == 152:
            retinanet = model.resnet152(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                        weights_dir=self.weights_dir_path,
                                        pretrained=True)
        else:
            raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
        self.retinanet = retinanet.to(device=self.device)
        self.retinanet.training = True
        self.optimizer = optim.Adam(self.retinanet.parameters(), lr=learning_rate)
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=3, verbose=True)

        if self.checkpoint is not None:
            self.retinanet.load_state_dict(self.checkpoint['model'])
            self.optimizer.load_state_dict(self.checkpoint['optimizer'])
            self.scheduler.load_state_dict(self.checkpoint['scheduler'])  # TODO: test this, is it done right?
            # TODO is it right to resume_read_trial optimizer and schedular like this???
        self.ratios = ratios
        self.scales = scales
        self.depth = depth
Example #14
0
    def set_models(self, dataset_train):
        # Create the model
        if self.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif self.depth == 34:
            retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif self.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif self.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(), pretrained=True)
        elif self.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
            retinanet = nn.DataParallel(retinanet)

        self.retinanet = retinanet.to(self.device)
        self.retinanet.training = True
        self.optimizer = optim.Adam(self.retinanet.parameters(), lr=self.lr)

        # This lr_shceduler reduce the learning rate based on the models's validation loss
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer,
                                                              patience=3,
                                                              verbose=True)

        self.loss_hist = collections.deque(maxlen=500)
def load_model(model_arch,numOfClasses):

    if model_arch.endswith('resnet34') or model_arch.endswith('resnet34') :
        NN = resnet34(pretrained = False)
        num_features = NN.fc.in_features
        NN.fc = nn.Linear(num_features,numOfClasses)
    elif model_arch.endswith('resnet50_pretrained') or model_arch.endswith('resnet50'):
        NN = models.resnet50(pretrained = True)
        num_features = NN.fc.in_features
        NN.fc = nn.Linear(num_features,numOfClasses)
    elif model_arch.endswith('resnet152_pretrained'):
        NN = models.resnet152(pretrained = True)
        num_features = NN.fc.in_features
        NN.fc = nn.Linear(num_features,numOfClasses)
    elif model_arch.endswith('resnet152'):
        NN = models.resnet152(pretrained = False)
        num_features = NN.fc.in_features
        NN.fc = nn.Linear(num_features,numOfClasses)
    elif model_arch.endswith('densenet121_pretrained'):
        NN =  models.densenet121(pretrained=True)
        num_ftrs = NN.classifier.in_features
        NN.classifier = nn.Linear(num_ftrs, numOfClasses)

    return NN
def main(args=None):

    parser     = argparse.ArgumentParser(description='Simple testing script for RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.',default = "csv")
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)',default="binary_class.csv")
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
    parser.add_argument('--csv_box_annot', help='Path to file containing predicted box annotations ')

    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=500)
    parser.add_argument('--model', help='Path of .pt file with trained model',default = 'esposallescsv_retinanet_0.pt')
    parser.add_argument('--model_out', help='Path of .pt file with trained model to save',default = 'trained')

    parser.add_argument('--score_threshold', help='Score above which boxes are kept',default=0.15)
    parser.add_argument('--nms_threshold', help='Score above which boxes are kept',default=0.2)
    parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement',default=100)
    parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition',default=50)
    parser.add_argument('--seg_level', help='Line or word, to choose anchor aspect ratio',default='line')
    parser.add_argument('--htr_gt_box',help='Train recognition branch with box gt (for debugging)',default=False)
    parser = parser.parse_args(args)
    
    # Create the data loaders

    if parser.dataset == 'csv':


        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')


        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))


        if parser.csv_box_annot is not None:
            box_annot_data = CSVDataset(train_file=parser.csv_box_annot, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

        else:    
            box_annot_data = None
    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    
    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val)

    if box_annot_data is not None:
        sampler_val = AspectRatioBasedSampler(box_annot_data, batch_size=1, drop_last=False)
        dataloader_box_annot = DataLoader(box_annot_data, num_workers=0, collate_fn=collater, batch_sampler=sampler_val)

    else:
        dataloader_box_annot = dataloader_val

    if not os.path.exists('trained_models'):
        os.mkdir('trained_models')

    # Create the model

    alphabet=dataset_val.alphabet
    if os.path.exists(parser.model):
        retinanet = torch.load(parser.model)
    else:
        if parser.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True,max_boxes=int(parser.max_boxes),score_threshold=float(parser.score_threshold),seg_level=parser.seg_level,alphabet=alphabet)
        elif parser.depth == 34:
            retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 101:
            retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 152:
            retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')        
    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    
    retinanet = torch.nn.DataParallel(retinanet).cuda()
    
    #retinanet = torch.load('../Documents/TRAINED_MODELS/pytorch-retinanet/esposallescsv_retinanet_99.pt')
    #print "LOADED pretrained MODEL\n\n"
    

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True)

    loss_hist = collections.deque(maxlen=500)
    ctc = CTCLoss()
    retinanet.module.freeze_bn()
    best_cer = 1000
    epochs_no_improvement=0
    
    cers=[]    
    retinanet.eval()
    retinanet.module.epochs_only_det = 0
    #retinanet.module.htr_gt_box = False
    
    retinanet.training=False    
    if parser.score_threshold is not None:
        retinanet.module.score_threshold = float(parser.score_threshold) 
    
    '''if parser.dataset == 'csv' and parser.csv_val is not None:

        print('Evaluating dataset')
    '''
    mAP = csv_eval.evaluate(dataset_val, retinanet,score_threshold=retinanet.module.score_threshold)
    aps = []
    for k,v in mAP.items():
        aps.append(v[0])
    print ("VALID mAP:",np.mean(aps))
            
    print("score th",retinanet.module.score_threshold)
    for idx,data in enumerate(dataloader_box_annot):
        print("Eval CER on validation set:",idx,"/",len(dataloader_box_annot),"\r")
        if box_annot_data:
            image_name = box_annot_data.image_names[idx].split('/')[-1].split('.')[-2]
        else:    
            image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2]
        #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val)
        text_gt_path="/".join(dataset_val.image_names[idx].split('/')[:-1])
        text_gt = os.path.join(text_gt_path,image_name+'.txt')
        f =open(text_gt,'r')
        text_gt_lines=f.readlines()[0]
        transcript_pred = get_transcript(image_name,data,retinanet,retinanet.module.score_threshold,float(parser.nms_threshold),dataset_val,alphabet)
        cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))
        print("GT",text_gt_lines)
        print("PREDS SAMPLE:",transcript_pred)
        print("VALID CER:",np.mean(cers),"best CER",best_cer)    
    print("GT",text_gt_lines)
    print("PREDS SAMPLE:",transcript_pred)
    print("VALID CER:",np.mean(cers),"best CER",best_cer)    
Example #17
0
def main(args=None):
#def main(epoch):
	parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
	parser.add_argument('--coco_path', help='Path to COCO directory')
	parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
	parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

	parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
	parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)

	#parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
	parser.add_argument('--start-epoch', default=0, type=int, help='manual epoch number (useful on restarts)')

	parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')

	parser = parser.parse_args(args)
	#args = parser.parse_args()        
	#parser = parser.parse_args(epoch)

	# Create the data loaders
	if parser.dataset == 'coco':

		if parser.coco_path is None:
			raise ValueError('Must provide --coco_path when training on COCO,')

		dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
		dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))

	elif parser.dataset == 'csv':

		if parser.csv_train is None:
			raise ValueError('Must provide --csv_train when training on COCO,')

		if parser.csv_classes is None:
			raise ValueError('Must provide --csv_classes when training on COCO,')


		dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

		if parser.csv_val is None:
			dataset_val = None
			print('No validation annotations provided.')
		else:
			dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False)
	dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

	if dataset_val is not None:
		sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
		dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

	# Create the model
	if parser.depth == 18:
		retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 34:
		retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 50:
		retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 101:
		retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 152:
		retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
	else:
		raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')		

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()

	#retinanet().load_state_dict(torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/resnet50-19c8e357.pth'))
       
	#if True:
           #print('==> Resuming from checkpoint..')
           #checkpoint = torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/coco_retinanet_2.pt')
           #retinanet().load_state_dict(checkpoint)
           #best_loss = checkpoint['loss']
           #start_epoch = checkpoint['epoch']
        
	
	retinanet = torch.nn.DataParallel(retinanet).cuda()

	retinanet.training = True

	#optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
	optimizer = optim.SGD(retinanet.parameters(), lr=1e-5)

	scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

	loss_hist = collections.deque(maxlen=500)

	retinanet.train()
	#retinanet.freeze_bn()               #for train from a middle state
	retinanet.module.freeze_bn()       #for train from the very beginning

	print('Num training images: {}'.format(len(dataset_train)))

	for epoch_num in range(parser.start_epoch, parser.epochs):

		if parser.resume:
		    if os.path.isfile(parser.resume):
                        print("=>loading checkpoint '{}'".format(parser.resume))
                        checkpoint = torch.load(parser.resume)
                        print(parser.start_epoch)
                        #parser.start_epoch = checkpoint['epoch']
                        #retinanet.load_state_dict(checkpoint['state_dict'])
                        retinanet=checkpoint
                        #retinanet.load_state_dict(checkpoint)
                        print(retinanet)
                        #optimizer.load_state_dict(checkpoint)
                        print("=> loaded checkpoint '{}' (epoch {})".format(parser.resume, checkpoint))
		    else:
                        print("=> no checkpoint found at '{}'".format(parser.resume))

		retinanet.train()
		retinanet.freeze_bn()
		#retinanet.module.freeze_bn()

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)
		
		epoch_loss = []
		
		for iter_num, data in enumerate(dataloader_train):
			try:
				optimizer.zero_grad()

				classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda()])

				classification_loss = classification_loss.mean()
				regression_loss = regression_loss.mean()

				loss = classification_loss + regression_loss
				
				if bool(loss == 0):
					continue

				loss.backward()

				torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

				optimizer.step()

				loss_hist.append(float(loss))

				epoch_loss.append(float(loss))

				print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
				
				del classification_loss
				del regression_loss
			except Exception as e:
				print(e)
				continue

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)

		
		scheduler.step(np.mean(epoch_loss))	

		#torch.save(retinanet.module, '{}_retinanet_101_{}.pt'.format(parser.dataset, epoch_num))
		torch.save(retinanet, '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num))
		name = '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num)
		parser.resume = '/users/wenchi/ghwwc/pytorch-retinanet-master_new/name'

	retinanet.eval()

	torch.save(retinanet, 'model_final_dilation_experiment1.pt'.format(epoch_num))
Example #18
0
def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print(
        'Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/'
    )
    # 实例化SummaryWriter对象
    tb_writer = SummaryWriter(log_dir="runs/flower_experiment")
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    # 划分数据为训练集和验证集
    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(
        args.data_path)

    # 定义训练以及预测时的预处理方法
    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        "val":
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    # 实例化训练数据集
    train_data_set = MyDataSet(images_path=train_images_path,
                               images_class=train_images_label,
                               transform=data_transform["train"])

    # 实例化验证数据集
    val_data_set = MyDataSet(images_path=val_images_path,
                             images_class=val_images_label,
                             transform=data_transform["val"])

    batch_size = args.batch_size
    # 计算使用num_workers的数量
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(
        train_data_set,
        batch_size=batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=nw,
        collate_fn=train_data_set.collate_fn)

    val_loader = torch.utils.data.DataLoader(
        val_data_set,
        batch_size=batch_size,
        shuffle=False,
        pin_memory=True,
        num_workers=nw,
        collate_fn=val_data_set.collate_fn)

    # 实例化模型
    model = resnet34(num_classes=args.num_classes).to(device)

    # 将模型写入tensorboard
    init_img = torch.zeros((1, 3, 224, 224), device=device)
    tb_writer.add_graph(model, init_img)

    # 如果存在预训练权重则载入
    if os.path.exists(args.weights):
        weights_dict = torch.load(args.weights, map_location=device)
        load_weights_dict = {
            k: v
            for k, v in weights_dict.items()
            if model.state_dict()[k].numel() == v.numel()
        }
        model.load_state_dict(load_weights_dict, strict=False)
    else:
        print("not using pretrain-weights.")

    # 是否冻结权重
    if args.freeze_layers:
        print("freeze layers except fc layer.")
        for name, para in model.named_parameters():
            # 除最后的全连接层外,其他权重全部冻结
            if "fc" not in name:
                para.requires_grad_(False)

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (
        1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)
        # update learning rate
        scheduler.step()

        # validate
        acc = evaluate(model=model, data_loader=val_loader, device=device)

        # add loss, acc and lr into tensorboard
        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["train_loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        # add figure into tensorboard
        fig = plot_class_preds(net=model,
                               images_dir="./plot_img",
                               transform=data_transform["val"],
                               num_plot=5,
                               device=device)
        if fig is not None:
            tb_writer.add_figure("predictions vs. actuals",
                                 figure=fig,
                                 global_step=epoch)

        # add conv1 weights into tensorboard
        tb_writer.add_histogram(tag="conv1",
                                values=model.conv1.weight,
                                global_step=epoch)
        tb_writer.add_histogram(tag="layer1/block0/conv1",
                                values=model.layer1[0].conv1.weight,
                                global_step=epoch)

        # save weights
        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))
Example #19
0
def main():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("using {} device.".format(device))

    data_transform = {
        #torchvision.transforms是pytorch中的图像预处理包。一般用Compose把多个步骤整合到一起
        "train": transforms.Compose([transforms.RandomResizedCrop(224),
                                     transforms.RandomHorizontalFlip(),#依据概率p对PIL图片进行垂直翻转
                                     transforms.ToTensor(),#将PIL Image或者 ndarray 转换为tensor,并且归一化至[0-1];
                                     # 归一化至[0-1]是直接除以255,若自己的ndarray数据尺度有变化,则需要自行修改。
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
        ##transforms.Normalize对图像标准化处理的参数,这里根据官网定义的
        #transforms.Normalize使用如下公式进行归一化:channel=(channel-mean)/std。前面三个是mean、后面三个是std
        #因为totensor已经转化为[0-1],故最后的结果范围是[-1~1]


        "val": transforms.Compose([transforms.Resize(256), #,长宽比固定不动,最小边缩放到256
                                   transforms.CenterCrop(224), #根据中心点剪切到224
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}#标准化处理,防止突出数值较高的指标在综合分析中的作用。

    data_root = os.path.abspath(os.path.join(os.getcwd(), "..\Test5_resnet"))  # 返回绝对路径.表示返回上上一层目录
    image_path = os.path.join(data_root, "data_set", "flower_data")# flower data set path
    # image_path = os.path.join(data_root, "data_set/flower_data") #再从根目录开始向下进行完整目录的拼接。
    # image_path = os.path.join("D:\pycharmproject\deep learning for images"
    #                                                       "\deep-learning-for-image-processing-master\pytorch_classification"
    #                                                       "\Test5_resnet/flower_data/data_set/flower_data")
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
    #断言(assert)作为一种软件调试的方法,提供了一种在代码中进行正确性检查的机制。
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
                                         transform=data_transform["train"])
    '''
    其构造函数如下:
 #ImageFolder假设所有的文件按文件夹保存,每个文件夹下存储同一个类别的图片,文件夹名为类名,
ImageFolder(root, transform=None, target_transform=None, loader=default_loader)
它主要有四个参数:
root:在root指定的路径下寻找图片
transform:对PIL Image进行的转换操作,transform的输入是使用loader读取图片的返回对象
target_transform:对label的转换
loader:给定路径后如何读取图片,默认读取为RGB格式的PIL Image对象
    '''
    train_num = len(train_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx #类别名与数字类别的映射关系字典(class_to_idx)
    cla_dict = dict((val, key) for key, val in flower_list.items())
    # write dict into json file
    json_str = json.dumps(cla_dict, indent=4) #json.dumps()是把python对象转换成json对象的一个过程,
    # 生成的是字符串。cla_dict是转化成json的对象,indent:参数根据数据格式缩进显示,读起来更加清晰。
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)
        '''
        因为传统打开文件是f = open('test.txt','r')如果存在则继续执行,然后f.close();如果不存在则会报错
        改用with open as简化上面过程。无论有无异常均可自动调用f.close()方法。
        '''

    batch_size = 16
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers 获取电脑的cpu个数。
    print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(train_dataset,#从实例化的dataset当中取得图片,
                                               # 然后打包成一个一个batch,然后输入网络进行训练
                                               batch_size=batch_size, shuffle=True,#打乱数据集
                                               num_workers=nw) #如果使用的是linux系统,
                                                # 将num_workers设置成一个大于0的数加快图像预处理进程
    validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),#在image-path中找到val文件夹
                                            transform=data_transform["val"])
    val_num = len(validate_dataset)
    validate_loader = torch.utils.data.DataLoader(validate_dataset,
                                                  batch_size=batch_size, shuffle=False,
                                                  num_workers=nw)

    print("using {} images for training, {} images fot validation.".format(train_num,
                                                                           val_num))
    
    net = resnet34()
    # load pretrain weights
    # download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
    model_weight_path = "./resnet34-pre.pth" #里面是预训练的权重
    assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
    missing_keys, unexpected_keys = net.load_state_dict(torch.load(model_weight_path), strict=False) #载入模型权重
    # for param in net.parameters():
    #     param.requires_grad = False

    # change fc layer structure
    in_channel = net.fc.in_features #这里的fc是model.ResNet里的fc;in_features是输入特征矩阵的深度
    net.fc = nn.Linear(in_channel, 5)
    net.to(device)

    loss_function = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.0001)

    epochs = 3
    best_acc = 0.0
    save_path = './resNet34.pth'
    train_steps = len(train_loader)
    for epoch in range(epochs):
        # train
        net.train()
        running_loss = 0.0
        train_bar = tqdm(train_loader) #tqdm显示进度
        for step, data in enumerate(train_bar):
            images, labels = data
            optimizer.zero_grad()
            logits = net(images.to(device))
            loss = loss_function(logits, labels.to(device))
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()

            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
                                                                     epochs,
                                                                     loss)

        # validate
        net.eval()
        acc = 0.0  # accumulate accurate number / epoch
        with torch.no_grad():
            val_bar = tqdm(validate_loader, colour='green')
            for val_data in val_bar:
                val_images, val_labels = val_data
                outputs = net(val_images.to(device))
                # loss = loss_function(outputs, test_labels)
                predict_y = torch.max(outputs, dim=1)[1]
                acc += torch.eq(predict_y, val_labels.to(device)).sum().item()

                val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1,
                                                           epochs)

        val_accurate = acc / val_num
        print('[epoch %d] train_loss: %.3f  val_accuracy: %.3f' %
              (epoch + 1, running_loss / train_steps, val_accurate))

        if val_accurate > best_acc:
            best_acc = val_accurate
            torch.save(net.state_dict(), save_path)

    print('Finished Training')
Example #20
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--coco_path',
                        help='Path to COCO directory',
                        type=str,
                        default='./data/coco')
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--checkpoint',
                        help='The path to the checkpoint.',
                        type=str,
                        default=None)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--batch_size',
                        help='Number of batch',
                        type=int,
                        default=16)
    parser.add_argument('--gpu_ids',
                        help='Gpu parallel',
                        type=str,
                        default='1, 2')

    parser = parser.parse_args(args)

    # Create the data lodaders
    dataset_train = CocoDataset(parser.coco_path,
                                set_name='train2017',
                                transform=transforms.Compose(
                                    [Normalizer(),
                                     Augmenter(),
                                     Resizer()]))
    dataset_val = CocoDataset(parser.coco_path,
                              set_name='val2017',
                              transform=transforms.Compose(
                                  [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=4,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=16,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=3,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    gpu_ids = parser.gpu_ids.split(',')
    device = torch.device("cuda:" + gpu_ids[0])
    torch.cuda.set_device(device)
    gpu_ids = list(map(int, gpu_ids))
    retinanet = torch.nn.DataParallel(retinanet, device_ids=gpu_ids).to(device)

    if parser.checkpoint:
        pretrained = torch.load(parser.checkpoint).state_dict()
        retinanet.module.load_state_dict(pretrained)

    # add tensorboard to record train log
    retinanet.training = True
    writer = SummaryWriter('./log')
    # writer.add_graph(retinanet, input_to_model=[images, labels])

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].to(device), data['ann'].to(device)])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))
                writer.add_scalar('Loss/train', loss, iter_num)
                writer.add_scalar('Loss/reg_loss', regression_loss, iter_num)
                writer.add_scalar('Loss/cls_loss', classification_loss,
                                  iter_num)

                epoch_loss.append(float(loss))

                if (iter_num + 1) % 1000 == 0:
                    print('Save model')
                    torch.save(
                        retinanet.module,
                        'COCO_retinanet_epoch{}_iter{}.pt'.format(
                            epoch_num, iter_num))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        print('Evaluating dataset')

        coco_eval.evaluate_coco(dataset_val, retinanet, writer)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module, 'COCO_retinanet_{}.pt'.format(epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt'.format(epoch_num))
Example #21
0
# load image
img = Image.open("../../data_set/flower_data/flower_photos/tulips/112428665_d8f3632f36_n.jpg")
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)

# read class_indict
try:
    json_file = open('./class_indices.json', 'r')
    class_indict = json.load(json_file)
except Exception as e:
    print(e)
    exit(-1)

# create model
model = resnet34(num_classes=5)
# load model weights
model_weight_path = "./resNet34.pth"
model.load_state_dict(torch.load(model_weight_path))
model.eval()
with torch.no_grad():
    # predict class
    output = torch.squeeze(model(img))
    predict = torch.softmax(output, dim=0)
    predict_cla = torch.argmax(predict).numpy()
print(class_indict[str(predict_cla)], predict[predict_cla].numpy())
plt.show()
def main():
    global NORM_MEAN, NORM_STD, coconut_model, train_history_dict

    for arg in vars(args):
        print(str(arg) + ': ' + str(getattr(args, arg)))
    print('=' * 100)

    # Build Model base on dataset and arc
    num_classes = None
    if args.model_type == 'food179':
        num_classes = 179
        NORM_MEAN = FOOD179_MEAN
        NORM_STD = FOOD179_STD
    elif args.model_type == 'nsfw':
        num_classes = 5
        NORM_MEAN = NSFW_MEAN
        NORM_STD = NSFW_STD
    else:
        raise ('Not Implemented!')

    if args.model_arc == 'resnet18':
        coconut_model = model.resnet18(num_classes=num_classes,
                                       zero_init_residual=True)
    elif args.model_arc == 'resnet34':
        coconut_model = model.resnet34(num_classes=num_classes,
                                       zero_init_residual=True)
    elif args.model_arc == 'resnet50':
        coconut_model = model.resnet50(num_classes=num_classes,
                                       zero_init_residual=True)
    elif args.model_arc == 'resnet101':
        coconut_model = model.resnet101(num_classes=num_classes,
                                        zero_init_residual=True)
    elif args.model_arc == 'resnet152':
        coconut_model = model.resnet152(num_classes=num_classes,
                                        zero_init_residual=True)
    elif args.model_arc == 'mobilenet':
        coconut_model = model.MobileNetV2(n_class=num_classes, input_size=256)
    else:
        raise ('Not Implemented!')

    coconut_model = nn.DataParallel(coconut_model)
    if args.cuda:
        coconut_model = coconut_model.cuda()
        torch.backends.benchmark = True
        print("CUDA Enabled")
        gpu_count = torch.cuda.device_count()
        print('Total of %d GPU available' % (gpu_count))
        args.train_batch_size = args.train_batch_size * gpu_count
        args.test_batch_size = args.test_batch_size * gpu_count
        print('args.train_batch_size: %d' % (args.train_batch_size))
        print('args.test_batch_size: %d' % (args.test_batch_size))

    model_parameters = filter(lambda p: p.requires_grad,
                              coconut_model.parameters())
    params = sum([np.prod(p.size()) for p in model_parameters])
    print('Total of %d parameters' % (params))
    # Build Training
    start_epoch = 0
    best_acc = 0
    optimizer = None
    scheduler = None
    milestones = [50, 150, 250]
    if args.train_optimizer == 'sgd':
        optimizer = optim.SGD(coconut_model.parameters(),
                              lr=args.lr,
                              momentum=0.9,
                              nesterov=True,
                              weight_decay=args.l2_reg)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=milestones,
                                                   gamma=0.1)
    elif args.train_optimizer == 'adam':
        optimizer = optim.Adam(coconut_model.parameters(), lr=args.lr)
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
                                                   milestones=milestones,
                                                   gamma=0.1)
    elif args.train_optimizer == 'adabound':
        optimizer = adabound.AdaBound(coconut_model.parameters(),
                                      lr=1e-3,
                                      final_lr=0.1)
        scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                    step_size=150,
                                                    gamma=0.1,
                                                    last_epoch=-1)

    global_steps = 0
    if not args.start_from_begining:
        filename = args.model_checkpoint_path
        if args.load_gpu_model_on_cpu:
            checkpoint = torch.load(filename,
                                    map_location=lambda storage, loc: storage)
        else:
            checkpoint = torch.load(filename)

        coconut_model.load_state_dict(checkpoint['model_state_dict'])
        optimizer.load_state_dict(checkpoint['model_optimizer'])
        best_acc = checkpoint['best_acc']
        train_history_dict = checkpoint['train_history_dict']
        scheduler.optimizer = optimizer  # Not sure if this actually works
        start_epoch = checkpoint['epoch']
        global_steps = checkpoint['global_steps']
        print(filename + ' loaded!')

    data_loaders = load_datasets()
    train_ops(start_epoch=start_epoch,
              model=coconut_model,
              optimizer=optimizer,
              scheduler=scheduler,
              data_loaders=data_loaders,
              best_acc=best_acc,
              global_steps=global_steps)
Example #23
0
        init.normal(m.weight.data, 1.0, 0.02)
        init.constant(m.bias.data, 0.0)
        #m.weight.data.normal_(1.0, 0.02)
        #m.bias.data.fill_(0)
    elif classname.find('Linear') != -1:
        init.kaiming_normal(m.weight.data, a=0, mode='fan_in')
        init.constant(m.bias.data, 0.0)
        #m.weight.data.normal_(0.0, 0.02)

def compute_accuracy(x, y):
     _, predicted = torch.max(x, dim=1)
     correct = (predicted == y).float()
     accuracy = torch.mean(correct) * 100.0
     return accuracy

Resnet34 = resnet34(num_classes=opt.out_class)

Resnet34.apply(weights_init)

criterion = nn.CrossEntropyLoss()


if opt.cuda:
    Resnet34.cuda()
    criterion.cuda()

if ngpu>1:
    Resnet34 = nn.DataParallel(Resnet34)

if opt.Resnet34 != '':
    Resnet34.load_state_dict(torch.load(opt.Resnet34))
def main_fun(rank, world_size, args):
    if torch.cuda.is_available() is False:
        raise EnvironmentError("not find GPU device for training.")

    # 初始化各进程环境 start
    os.environ["MASTER_ADDR"] = "localhost"
    os.environ["MASTER_PORT"] = "12355"

    args.rank = rank
    args.world_size = world_size
    args.gpu = rank

    args.distributed = True

    torch.cuda.set_device(args.gpu)
    args.dist_backend = 'nccl'
    print('| distributed init (rank {}): {}'.format(args.rank, args.dist_url),
          flush=True)
    dist.init_process_group(backend=args.dist_backend,
                            init_method=args.dist_url,
                            world_size=args.world_size,
                            rank=args.rank)
    dist.barrier()
    # 初始化各进程环境 end

    rank = args.rank
    device = torch.device(args.device)
    batch_size = args.batch_size
    num_classes = args.num_classes
    weights_path = args.weights
    args.lr *= args.world_size  # 学习率要根据并行GPU的数量进行倍增

    if rank == 0:  # 在第一个进程中打印信息,并实例化tensorboard
        print(args)
        print(
            'Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/'
        )
        tb_writer = SummaryWriter()
        if os.path.exists("./weights") is False:
            os.makedirs("./weights")

    train_images_path, train_images_label, val_images_path, val_images_label = read_split_data(
        args.data_path)

    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        "val":
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    # 实例化训练数据集
    train_data_set = MyDataSet(images_path=train_images_path,
                               images_class=train_images_label,
                               transform=data_transform["train"])

    # 实例化验证数据集
    val_data_set = MyDataSet(images_path=val_images_path,
                             images_class=val_images_label,
                             transform=data_transform["val"])

    # 给每个rank对应的进程分配训练的样本索引
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_data_set)
    val_sampler = torch.utils.data.distributed.DistributedSampler(val_data_set)

    # 将样本索引每batch_size个元素组成一个list
    train_batch_sampler = torch.utils.data.BatchSampler(train_sampler,
                                                        batch_size,
                                                        drop_last=True)

    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    if rank == 0:
        print('Using {} dataloader workers every process'.format(nw))

    train_loader = torch.utils.data.DataLoader(
        train_data_set,
        batch_sampler=train_batch_sampler,
        pin_memory=True,
        num_workers=nw,
        collate_fn=train_data_set.collate_fn)

    val_loader = torch.utils.data.DataLoader(
        val_data_set,
        batch_size=batch_size,
        sampler=val_sampler,
        pin_memory=True,
        num_workers=nw,
        collate_fn=val_data_set.collate_fn)
    # 实例化模型
    model = resnet34(num_classes=num_classes).to(device)

    # 如果存在预训练权重则载入
    if os.path.exists(weights_path):
        weights_dict = torch.load(weights_path, map_location=device)
        load_weights_dict = {
            k: v
            for k, v in weights_dict.items()
            if model.state_dict()[k].numel() == v.numel()
        }
        model.load_state_dict(load_weights_dict, strict=False)
    else:
        checkpoint_path = os.path.join(tempfile.gettempdir(),
                                       "initial_weights.pt")
        # 如果不存在预训练权重,需要将第一个进程中的权重保存,然后其他进程载入,保持初始化权重一致
        if rank == 0:
            torch.save(model.state_dict(), checkpoint_path)

        dist.barrier()
        # 这里注意,一定要指定map_location参数,否则会导致第一块GPU占用更多资源
        model.load_state_dict(torch.load(checkpoint_path, map_location=device))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后的全连接层外,其他权重全部冻结
            if "fc" not in name:
                para.requires_grad_(False)
    else:
        # 只有训练带有BN结构的网络时使用SyncBatchNorm采用意义
        if args.syncBN:
            # 使用SyncBatchNorm后训练会更耗时
            model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(
                device)

    # 转为DDP模型
    model = torch.nn.parallel.DistributedDataParallel(model,
                                                      device_ids=[args.gpu])

    # optimizer
    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (
        1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        train_sampler.set_epoch(epoch)

        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        sum_num = evaluate(model=model, data_loader=val_loader, device=device)
        acc = sum_num / val_sampler.total_size

        if rank == 0:
            print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
            tags = ["loss", "accuracy", "learning_rate"]
            tb_writer.add_scalar(tags[0], mean_loss, epoch)
            tb_writer.add_scalar(tags[1], acc, epoch)
            tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"],
                                 epoch)

            torch.save(model.module.state_dict(),
                       "./weights/model-{}.pth".format(epoch))

    # 删除临时缓存文件
    if rank == 0:
        if os.path.exists(checkpoint_path) is True:
            os.remove(checkpoint_path)

    cleanup()
Example #25
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description="Simple training script for training a RetinaNet network.")

    parser.add_argument(
        "--dataset", help="Dataset type, must be one of csv or coco or ycb.")
    parser.add_argument("--path", help="Path to dataset directory")
    parser.add_argument(
        "--csv_train",
        help="Path to file containing training annotations (see readme)")
    parser.add_argument("--csv_classes",
                        help="Path to file containing class list (see readme)")
    parser.add_argument("--csv_val",
                        help="Path to file containing validation annotations "
                        "(optional, see readme)")

    parser.add_argument(
        "--depth",
        help="Resnet depth, must be one of 18, 34, 50, 101, 152",
        type=int,
        default=50)
    parser.add_argument("--epochs",
                        help="Number of epochs",
                        type=int,
                        default=100)
    parser.add_argument("--evaluate_every", default=20, type=int)
    parser.add_argument("--print_every", default=20, type=int)
    parser.add_argument('--distributed',
                        action="store_true",
                        help='Run model in distributed mode with DataParallel')

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == "coco":

        if parser.path is None:
            raise ValueError(
                "Must provide --path when training on non-CSV datasets")

        dataset_train = CocoDataset(parser.path,
                                    ann_file="instances_train2014.json",
                                    set_name="train2014",
                                    transform=transforms.Compose([
                                        Normalizer(),
                                        Augmenter(),
                                        Resizer(min_side=512, max_side=512)
                                    ]))
        dataset_val = CocoDataset(parser.path,
                                  ann_file="instances_val2014.cars.json",
                                  set_name="val2014",
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == "ycb":

        dataset_train = YCBDataset(parser.path,
                                   "image_sets/train.txt",
                                   transform=transforms.Compose([
                                       Normalizer(),
                                       Augmenter(),
                                       Resizer(min_side=512, max_side=512)
                                   ]),
                                   train=True)
        dataset_val = YCBDataset(parser.path,
                                 "image_sets/val.txt",
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]),
                                 train=False)

    elif parser.dataset == "csv":

        if parser.csv_train is None:
            raise ValueError("Must provide --csv_train when training on COCO,")

        if parser.csv_classes is None:
            raise ValueError(
                "Must provide --csv_classes when training on COCO,")

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print("No validation annotations provided.")
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            "Dataset type not understood (must be csv or coco), exiting.")

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=12,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=4,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            "Unsupported model depth, must be one of 18, 34, 50, 101, 152")

    print("CUDA available: {}".format(torch.cuda.is_available()))
    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"

    retinanet = retinanet.to(device)

    if parser.distributed:
        retinanet = torch.nn.DataParallel(retinanet)

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    print("Num training images: {}".format(len(dataset_train)))

    best_mean_avg_prec = 0.0

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data["img"].to(device).float(), data["annot"]])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss.item()))
                epoch_loss.append(float(loss.item()))

                if parser.print_every % iter_num == 0:
                    print("Epoch: {} | Iteration: {}/{} | "
                          "Classification loss: {:1.5f} | "
                          "Regression loss: {:1.5f} | "
                          "Running loss: {:1.5f}".format(
                              epoch_num, iter_num, len(dataloader_train),
                              float(classification_loss),
                              float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if ((epoch_num + 1) % parser.evaluate_every
                == 0) or epoch_num + 1 == parser.epochs:

            mAP = 0.0

            if parser.dataset == "coco":

                print("Evaluating dataset")
                mAP = coco_eval.evaluate_coco(dataset_val, retinanet)

            else:
                print("Evaluating dataset")
                AP = eval.evaluate(dataset_val, retinanet)
                mAP = np.asarray([x[0] for x in AP.values()]).mean()
                print("Val set mAP: ", mAP)

            if mAP > best_mean_avg_prec:
                best_mean_avg_prec = mAP
                torch.save(
                    retinanet.state_dict(),
                    "{}_retinanet_best_mean_ap_{}.pt".format(
                        parser.dataset, epoch_num))

        scheduler.step(np.mean(epoch_loss))

    retinanet.eval()

    torch.save(retinanet.state_dict(), "retinanet_model_final.pt")
Example #26
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--optimizer',
                        help='[SGD | Adam]',
                        type=str,
                        default='SGD')
    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser = parser.parse_args(args)

    # Create the data loaders
    print("\n[Phase 1]: Creating DataLoader for {} dataset".format(
        parser.dataset))
    if parser.dataset == 'coco':
        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2014',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2014',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':
        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=8,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=16,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=8,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    print('| Num training images: {}'.format(len(dataset_train)))
    print('| Num test images : {}'.format(len(dataset_val)))

    print("\n[Phase 2]: Preparing RetinaNet Detection Model...")
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        device = torch.device('cuda')
        retinanet = retinanet.to(device)

    retinanet = torch.nn.DataParallel(retinanet,
                                      device_ids=range(
                                          torch.cuda.device_count()))
    print("| Using %d GPUs for Train/Validation!" % torch.cuda.device_count())
    retinanet.training = True

    if parser.optimizer == 'Adam':
        optimizer = optim.Adam(retinanet.parameters(),
                               lr=1e-5)  # not mentioned
        print("| Adam Optimizer with Learning Rate = {}".format(1e-5))
    elif parser.optimizer == 'SGD':
        optimizer = optim.SGD(retinanet.parameters(),
                              lr=1e-2,
                              momentum=0.9,
                              weight_decay=1e-4)
        print("| SGD Optimizer with Learning Rate = {}".format(1e-2))
    else:
        raise ValueError('Unsupported Optimizer, must be one of [SGD | Adam]')

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn(
    )  # Freeze the BN parameters to ImageNet configuration

    # Check if there is a 'checkpoints' path
    if not osp.exists('./checkpoints/'):
        os.makedirs('./checkpoints/')

    print("\n[Phase 3]: Training Model on {} dataset...".format(
        parser.dataset))
    for epoch_num in range(parser.epochs):
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet(
                    [data['img'].to(device), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss
                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.001)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                sys.stdout.write('\r')
                sys.stdout.write(
                    '| Epoch: {} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num + 1, iter_num + 1, len(dataloader_train),
                            float(classification_loss), float(regression_loss),
                            np.mean(loss_hist)))
                sys.stdout.flush()

                del classification_loss
                del regression_loss

            except Exception as e:
                print(e)
                continue

        print("\n| Saving current best model at epoch {}...".format(epoch_num +
                                                                    1))
        torch.save(
            retinanet.state_dict(),
            './checkpoints/{}_retinanet_{}.pt'.format(parser.dataset,
                                                      epoch_num + 1))

        if parser.dataset == 'coco':
            #print('Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet, device)

        elif parser.dataset == 'csv' and parser.csv_val is not None:
            #print('Evaluating dataset')
            mAP = csv_eval.evaluate(dataset_val, retinanet, device)

        scheduler.step(np.mean(epoch_loss))

    retinanet.eval()
    torch.save(retinanet.state_dict(), './checkpoints/model_final.pt')
Example #27
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default="csv")
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)',
                        default="binary_class.csv")
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=18)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=500)
    parser.add_argument('--epochs_only_det',
                        help='Number of epochs to train detection part',
                        type=int,
                        default=1)
    parser.add_argument('--max_epochs_no_improvement',
                        help='Max epochs without improvement',
                        type=int,
                        default=100)
    parser.add_argument('--pretrained_model',
                        help='Path of .pt file with pretrained model',
                        default='esposallescsv_retinanet_0.pt')
    parser.add_argument('--model_out',
                        help='Path of .pt file with trained model to save',
                        default='trained')

    parser.add_argument('--score_threshold',
                        help='Score above which boxes are kept',
                        type=float,
                        default=0.5)
    parser.add_argument('--nms_threshold',
                        help='Score above which boxes are kept',
                        type=float,
                        default=0.2)
    parser.add_argument('--max_boxes',
                        help='Max boxes to be fed to recognition',
                        default=95)
    parser.add_argument('--seg_level',
                        help='[line, word], to choose anchor aspect ratio',
                        default='word')
    parser.add_argument(
        '--early_stop_crit',
        help='Early stop criterion, detection (map) or transcription (cer)',
        default='cer')
    parser.add_argument('--max_iters_epoch',
                        help='Max steps per epoch (for debugging)',
                        default=1000000)
    parser.add_argument('--train_htr',
                        help='Train recognition or not',
                        default='True')
    parser.add_argument('--train_det',
                        help='Train detection or not',
                        default='True')
    parser.add_argument(
        '--binary_classifier',
        help=
        'Wether to use classification branch as binary or not, multiclass instead.',
        default='False')
    parser.add_argument(
        '--htr_gt_box',
        help='Train recognition branch with box gt (for debugging)',
        default='False')
    parser.add_argument(
        '--ner_branch',
        help='Train named entity recognition with separate branch',
        default='False')

    parser = parser.parse_args(args)

    if parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train')

        dataset_name = parser.csv_train.split("/")[-2]

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    # Files for training log

    experiment_id = str(time.time()).split('.')[0]
    valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'w')
    for arg in vars(parser):
        if getattr(parser, arg) is not None:
            valid_cer_f.write(
                str(arg) + ' ' + str(getattr(parser, arg)) + '\n')

    current_commit = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
    valid_cer_f.write(str(current_commit))

    valid_cer_f.write(
        "epoch_num   cer     best cer     mAP    best mAP     time\n")

    valid_cer_f.close()

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=1,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=0,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    if not os.path.exists('trained_models'):
        os.mkdir('trained_models')

    # Create the model

    train_htr = parser.train_htr == 'True'
    htr_gt_box = parser.htr_gt_box == 'True'
    ner_branch = parser.ner_branch == 'True'
    binary_classifier = parser.binary_classifier == 'True'
    torch.backends.cudnn.benchmark = False

    alphabet = dataset_train.alphabet
    if os.path.exists(parser.pretrained_model):
        retinanet = torch.load(parser.pretrained_model)
        retinanet.classificationModel = ClassificationModel(
            num_features_in=256,
            num_anchors=retinanet.anchors.num_anchors,
            num_classes=dataset_train.num_classes())
        if ner_branch:
            retinanet.nerModel = NERModel(
                feature_size=256,
                pool_h=retinanet.pool_h,
                n_classes=dataset_train.num_classes(),
                pool_w=retinanet.pool_w)
    else:
        if parser.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       max_boxes=int(parser.max_boxes),
                                       score_threshold=float(
                                           parser.score_threshold),
                                       seg_level=parser.seg_level,
                                       alphabet=alphabet,
                                       train_htr=train_htr,
                                       htr_gt_box=htr_gt_box,
                                       ner_branch=ner_branch,
                                       binary_classifier=binary_classifier)

        elif parser.depth == 34:

            retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       max_boxes=int(parser.max_boxes),
                                       score_threshold=float(
                                           parser.score_threshold),
                                       seg_level=parser.seg_level,
                                       alphabet=alphabet,
                                       train_htr=train_htr,
                                       htr_gt_box=htr_gt_box)

        elif parser.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif parser.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True
    train_htr = parser.train_htr == 'True'
    train_det = parser.train_det == 'True'
    retinanet.htr_gt_box = parser.htr_gt_box == 'True'

    retinanet.train_htr = train_htr
    retinanet.epochs_only_det = parser.epochs_only_det

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=50,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)
    ctc = CTCLoss()
    retinanet.train()
    retinanet.module.freeze_bn()

    best_cer = 1000
    best_map = 0
    epochs_no_improvement = 0
    verbose_each = 20
    optimize_each = 1
    objective = 100
    best_objective = 10000

    print(('Num training images: {}'.format(len(dataset_train))))

    for epoch_num in range(parser.epochs):
        cers = []

        retinanet.training = True

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            if iter_num > int(parser.max_iters_epoch): break
            try:
                if iter_num % optimize_each == 0:
                    optimizer.zero_grad()
                (classification_loss, regression_loss, ctc_loss,
                 ner_loss) = retinanet([
                     data['img'].cuda().float(), data['annot'], ctc, epoch_num
                 ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                if train_det:

                    if train_htr:
                        loss = ctc_loss + classification_loss + regression_loss + ner_loss

                    else:
                        loss = classification_loss + regression_loss + ner_loss

                elif train_htr:
                    loss = ctc_loss

                else:
                    continue
                if bool(loss == 0):
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                if iter_num % verbose_each == 0:
                    print((
                        'Epoch: {} | Step: {} |Classification loss: {:1.5f} | Regression loss: {:1.5f} | CTC loss: {:1.5f} | NER loss: {:1.5f} | Running loss: {:1.5f} | Total loss: {:1.5f}\r'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), float(ctc_loss),
                                float(ner_loss), np.mean(loss_hist),
                                float(loss), "\r")))

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                torch.cuda.empty_cache()

            except Exception as e:
                print(e)
                continue
        if parser.dataset == 'csv' and parser.csv_val is not None and train_det:

            print('Evaluating dataset')

            mAP, text_mAP, current_cer = csv_eval.evaluate(
                dataset_val, retinanet, score_threshold=parser.score_threshold)
            #text_mAP,_ = csv_eval_binary_map.evaluate(dataset_val, retinanet,score_threshold=parser.score_threshold)
            objective = current_cer * (1 - mAP)

        retinanet.eval()
        retinanet.training = False
        retinanet.score_threshold = float(parser.score_threshold)
        '''for idx,data in enumerate(dataloader_val):
            if idx>int(parser.max_iters_epoch): break
            print("Eval CER on validation set:",idx,"/",len(dataset_val),"\r")
            image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2]

            #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val)
            text_gt =".".join(dataset_val.image_names[idx].split('.')[:-1])+'.txt'
            f =open(text_gt,'r')
            text_gt_lines=f.readlines()[0]
            transcript_pred = get_transcript(image_name,data,retinanet,float(parser.score_threshold),float(parser.nms_threshold),dataset_val,alphabet)
            cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))'''

        t = str(time.time()).split('.')[0]

        valid_cer_f.close()
        #print("GT",text_gt_lines)
        #print("PREDS SAMPLE:",transcript_pred)

        if parser.early_stop_crit == 'cer':

            if float(objective) < float(
                    best_objective):  #float(current_cer)<float(best_cer):
                best_cer = current_cer
                best_objective = objective

                epochs_no_improvement = 0
                torch.save(
                    retinanet.module, 'trained_models/' + parser.model_out +
                    '{}_retinanet.pt'.format(parser.dataset))

            else:
                epochs_no_improvement += 1
            if mAP > best_map:
                best_map = mAP
        elif parser.early_stop_crit == 'map':
            if mAP > best_map:
                best_map = mAP
                epochs_no_improvement = 0
                torch.save(
                    retinanet.module, 'trained_models/' + parser.model_out +
                    '{}_retinanet.pt'.format(parser.dataset))

            else:
                epochs_no_improvement += 1
            if float(current_cer) < float(best_cer):
                best_cer = current_cer
        if train_det:
            print(epoch_num, "mAP: ", mAP, " best mAP", best_map)
        if train_htr:
            print("VALID CER:", current_cer, "best CER", best_cer)
        print("Epochs no improvement:", epochs_no_improvement)
        valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt',
                           'a')
        valid_cer_f.write(
            str(epoch_num) + " " + str(current_cer) + " " + str(best_cer) +
            ' ' + str(mAP) + ' ' + str(best_map) + ' ' + str(text_mAP) + '\n')
        if epochs_no_improvement > 3:
            for param_group in optimizer.param_groups:
                if param_group['lr'] > 10e-5:
                    param_group['lr'] *= 0.1

        if epochs_no_improvement >= parser.max_epochs_no_improvement:
            print("TRAINING FINISHED AT EPOCH", epoch_num, ".")
            sys.exit()

        scheduler.step(np.mean(epoch_loss))
        torch.cuda.empty_cache()

    retinanet.eval()
Example #28
0
def main(args):
    device = torch.device(args.device if torch.cuda.is_available() else "cpu")

    print(args)
    print(
        'Start Tensorboard with "tensorboard --logdir=runs", view at http://localhost:6006/'
    )
    tb_writer = SummaryWriter()
    if os.path.exists("./weights") is False:
        os.makedirs("./weights")

    train_info, val_info, num_classes = read_split_data(args.data_path)
    train_images_path, train_images_label = train_info
    val_images_path, val_images_label = val_info

    # check num_classes
    assert args.num_classes == num_classes, "dataset num_classes: {}, input {}".format(
        args.num_classes, num_classes)

    data_transform = {
        "train":
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        "val":
        transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }

    # 实例化训练数据集
    train_data_set = MyDataSet(images_path=train_images_path,
                               images_class=train_images_label,
                               transform=data_transform["train"])

    # 实例化验证数据集
    val_data_set = MyDataSet(images_path=val_images_path,
                             images_class=val_images_label,
                             transform=data_transform["val"])

    batch_size = args.batch_size
    nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0,
              8])  # number of workers
    print('Using {} dataloader workers every process'.format(nw))
    train_loader = torch.utils.data.DataLoader(
        train_data_set,
        batch_size=batch_size,
        shuffle=True,
        pin_memory=True,
        num_workers=nw,
        collate_fn=train_data_set.collate_fn)

    val_loader = torch.utils.data.DataLoader(
        val_data_set,
        batch_size=batch_size,
        shuffle=False,
        pin_memory=True,
        num_workers=nw,
        collate_fn=val_data_set.collate_fn)

    # 如果存在预训练权重则载入
    model = resnet34(num_classes=args.num_classes).to(device)
    if args.weights != "":
        if os.path.exists(args.weights):
            weights_dict = torch.load(args.weights, map_location=device)
            load_weights_dict = {
                k: v
                for k, v in weights_dict.items()
                if model.state_dict()[k].numel() == v.numel()
            }
            print(model.load_state_dict(load_weights_dict, strict=False))
        else:
            raise FileNotFoundError("not found weights file: {}".format(
                args.weights))

    # 是否冻结权重
    if args.freeze_layers:
        for name, para in model.named_parameters():
            # 除最后的全连接层外,其他权重全部冻结
            if "fc" not in name:
                para.requires_grad_(False)

    pg = [p for p in model.parameters() if p.requires_grad]
    optimizer = optim.SGD(pg, lr=args.lr, momentum=0.9, weight_decay=0.005)
    # Scheduler https://arxiv.org/pdf/1812.01187.pdf
    lf = lambda x: ((1 + math.cos(x * math.pi / args.epochs)) / 2) * (
        1 - args.lrf) + args.lrf  # cosine
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)

    for epoch in range(args.epochs):
        # train
        mean_loss = train_one_epoch(model=model,
                                    optimizer=optimizer,
                                    data_loader=train_loader,
                                    device=device,
                                    epoch=epoch)

        scheduler.step()

        # validate
        sum_num = evaluate(model=model, data_loader=val_loader, device=device)
        acc = sum_num / len(val_data_set)
        print("[epoch {}] accuracy: {}".format(epoch, round(acc, 3)))
        tags = ["loss", "accuracy", "learning_rate"]
        tb_writer.add_scalar(tags[0], mean_loss, epoch)
        tb_writer.add_scalar(tags[1], acc, epoch)
        tb_writer.add_scalar(tags[2], optimizer.param_groups[0]["lr"], epoch)

        torch.save(model.state_dict(), "./weights/model-{}.pth".format(epoch))
Example #29
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            '{}_retinanet_dilation_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final_dilation.pt'.format(epoch_num))
Example #30
0
def main(config):
    # set seed for reproducibility
    np.random.seed(0)
    torch.manual_seed(0)
    torch.cuda.manual_seed(0)
    # create folder for model
    newpath = './models/' + config.model_date
    if config.save_model:
        os.makedirs(newpath)

    # Create the data loaders
    if config.csv_train is None:
        raise ValueError('Must provide --csv_train when training on csv,')

    if config.csv_classes is None:
        raise ValueError('Must provide --csv_classes when training on csv,')

    train_dataset = datasets.ImageFolder(os.path.join(config.data_dir,
                                                      'train'))
    dataset_train = GetDataset(train_file=config.csv_train,
                               class_list=config.csv_classes,
                               transform=transforms.Compose(
                                   [Augmenter(), Resizer()]),
                               dataset=train_dataset,
                               seed=0)
    dataloader_train = DataLoader(dataset_train,
                                  batch_size=config.batch_size,
                                  shuffle=True,
                                  num_workers=1,
                                  collate_fn=collater)

    if config.csv_val is None:
        dataset_val = None
        print('No validation annotations provided.')
    else:
        valid_dataset = datasets.ImageFolder(
            os.path.join(config.data_dir, 'valid'))
        dataset_val = GetDataset(train_file=config.csv_val,
                                 class_list=config.csv_classes,
                                 transform=transforms.Compose([Resizer()]),
                                 dataset=valid_dataset,
                                 seed=0)

    # Create the model
    if config.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif config.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif config.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif config.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif config.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    if config.use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    best_valid_map = 0
    counter = 0
    batch_size = config.batch_size

    for epoch_num in range(config.epochs):
        print('\nEpoch: {}/{}'.format(epoch_num + 1, config.epochs))
        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        train_batch_time = AverageMeter()
        train_losses = AverageMeter()
        tic = time.time()
        with tqdm(total=len(dataset_train)) as pbar:
            for iter_num, data in enumerate(dataloader_train):
                # try:
                optimizer.zero_grad()
                siamese_loss, classification_loss, regression_loss = retinanet(
                    [
                        data['img'].cuda().float(), data['annot'],
                        data['pair'].cuda().float()
                    ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss + siamese_loss

                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                optimizer.step()
                epoch_loss.append(float(loss))

                toc = time.time()
                train_losses.update(float(loss), batch_size)
                train_batch_time.update(toc - tic)
                tic = time.time()

                pbar.set_description(("{:.1f}s - loss: {:.3f}".format(
                    train_batch_time.val,
                    train_losses.val,
                )))
                pbar.update(batch_size)

                del classification_loss
                del regression_loss
                del siamese_loss

                # except Exception as e:
                #     print('Training error: ', e)
                #     continue

        if config.csv_val is not None:
            print('Evaluating dataset')
            mAP, correct = eval_new.evaluate(dataset_val, retinanet)

            # is_best = mAP[0][0] > best_valid_map
            # best_valid_map = max(mAP[0][0], best_valid_map)
            is_best = correct > best_valid_map
            best_valid_map = max(correct, best_valid_map)
            if is_best:
                counter = 0
            else:
                counter += 1
                if counter > 3:
                    print("[!] No improvement in a while, stopping training.")
                    break

        scheduler.step(np.mean(epoch_loss))
        if is_best and config.save_model:
            torch.save(
                retinanet.state_dict(),
                './models/{}/best_retinanet.pt'.format(config.model_date))
        if config.save_model:
            torch.save(
                retinanet.state_dict(),
                './models/{}/{}_retinanet_{}.pt'.format(
                    config.model_date, config.depth, epoch_num))

        msg = "train loss: {:.3f} - val map: {:.3f} - val acc: {:.3f}%"
        print(
            msg.format(train_losses.avg, mAP[0][0],
                       (100. * correct) / len(dataset_val)))