def classifier(args, clf, train, pretrained_dir="/"):
    """Set classifier

    Arguments:
        dataset -- "Cifar10" or "Cifar100"
        clf -- "resnet18", "resnet50", or "resnet101"
        train {bool} -- Train or not

    Keyword Arguments:
        pretrained_dir {str} -- pretrained weights path (default: {"/"})

    Returns:
        Model
    """
    num_classes = args.num_classes
    input_channels = args.image_channels

    map_location = (lambda s, _: s)
    checkpoint_dir = os.path.join(pretrained_dir, args.dataset, clf + '.pth')

    if clf.lower() == 'resnet18':
        if train:
            net = resnet18(num_classes, input_channels)
        else:
            checkpoint = torch.load(checkpoint_dir, map_location=map_location)
            net = resnet18(num_classes, input_channels)
            net.load_state_dict(checkpoint['model_state_dict'])

    elif clf.lower() == 'resnet50':
        if train:
            net = resnet50(num_classes, input_channels)
        else:
            checkpoint = torch.load(checkpoint_dir, map_location=map_location)
            net = resnet50(num_classes, input_channels)
            net.load_state_dict(checkpoint['model_state_dict'])

    elif clf.lower() == 'resnet101':
        if train:
            net = resnet101(num_classes, input_channels)
        else:
            checkpoint = torch.load(checkpoint_dir, map_location=map_location)
            net = resnet101(num_classes, input_channels)
            net.load_state_dict(checkpoint['model_state_dict'])

    else:
        raise Exception(
            "You can choose the model among [resnet18, resnet 50, resnet101]")

    return net
Beispiel #2
0
def train_main(args):
    global loader_train, loader_val
    loader_train, loader_val, loader_test = load_data(train_bath_size=BATCH_SIZE, args=args,RANDOM_SEED=RANDOM_SEED, val_batch_size=BATCH_SIZE)

    device = set_device()
    setup_seed(RANDOM_SEED) #随机种子

    #model = googleNet()
    model = resnet18()
    #model = load_model(model, args.pretrained_model_path, device=device)
    model = nn.DataParallel(model) #多gpu

    criterion = nn.CrossEntropyLoss()

    params = net_lr(model, FC_LR, NET_LR)

    if OPTIMIZER == 'adam':
        optimizer = torch.optim.Adam(params, betas=(0.9, 0.999), weight_decay=0, eps=1e-08)
    else:
        optimizer = torch.optim.SGD(params, momentum=MOMENTUM, nesterov=True,
                                    weight_decay=WEIGHT_DECAY)

    print(model)
    start_epoch = 0
    if Load_model:
        start_epoch = 25
        filepath = 'load_model_path'
        model = load_model(model, filepath, device=device)
        model = model.to(device=device)
        optimizer = load_optimizer(optimizer, filepath, device=device)

    train(model, optimizer, criterion, device=device, epochs=EPOCH, start=start_epoch)
Beispiel #3
0
def model_init(model_name):

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if model_name == 'retinanet' :
      #weight_file_path = '/content/retinanet/resnet34-333f7ec4.pth'
      #weight_file_path = '/content/retinanet/CP_epoch5.pth'
      weight_file_path = '/content/retinanet/retinanet50_pretrained.pth'

    total_keys = len(list(torch.load(weight_file_path).keys()))

    # Create the model
    if total_keys >= 102 and total_keys < 182 :
        retinanet = model.resnet18(num_classes=num_classes, pretrained=False)

    elif total_keys >= 182 and total_keys < 267:
        retinanet = model.resnet34(num_classes=num_classes, pretrained=False)
        
    elif total_keys >= 267 and total_keys < 522:
        retinanet = model.resnet50(num_classes=num_classes, pretrained=False)
        
    elif total_keys >= 522 and total_keys < 777:
        retinanet = model.resnet101(num_classes=num_classes, pretrained=False)
        
    elif total_keys >= 777:
        retinanet = model.resnet152(num_classes=num_classes, pretrained=False)
        
    else:
        raise ValueError('Unsupported model backbone, must be one of resnet18, resnet34, resnet50, resnet101, resnet152')

    retinanet.load_state_dict(torch.load(weight_file_path, map_location=device), strict=False) # Initialisng Model with loaded weights
    print('model initialized..')

    return retinanet, device
def cnn(classes=5):
    model = resnet18(num_classes=classes)
    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer=keras.optimizers.Adam(lr=0.001,
                                                  beta_1=0.9,
                                                  beta_2=0.999),
                  metrics=['accuracy'])
    return model
    def __init__(self, batch_size, lr, num_workers, data_root, **kwargs):
        super(LitDistModule, self).__init__()
        self.model = resnet18(with_fc=False)

        self.batch_size = batch_size
        self.lr = lr
        self.num_workers = num_workers
        self.data_root = data_root
Beispiel #6
0
def test_resnet18(test_loader):
    model = resnet18(input_channel=1, num_classes=10)
    print("ResNet-18:")
    print(model)
    for batch_idx, (data, target) in enumerate(test_loader):
        print("Input data shape: {}".format(data.shape))
        output = model(data)
        print("Output shape: {}".format(output.shape))
        break
Beispiel #7
0
    def __init__(self, lr, batch_size, num_workers, **kwargs):
        super(BasePL, self).__init__()
        self.model = resnet18(with_fc=True)
        self.criterion = torch.nn.CrossEntropyLoss()
        self.train_accuracy = pl.metrics.Accuracy()

        self.batch_size = batch_size
        self.lr = lr
        self.num_workers = num_workers

        self.save_hyperparameters()
def main_dropout(data_args, train_args, model_args):
    train_loader, val_loader, test_loader = get_dataloaders(data_args, load_test=False)
    #train_loader, val_loader, test_loader = get_dataloaders_incr(data_args, load_test=False)
    #train_loader, val_loader = train_loader[0], val_loader[0]

    assert model_args.load_state_path, 'please specify a path to a pretrained model'
    state = torch.load(model_args.load_state_path)
    net = resnet18(num_classes=data_args.num_classes, seed=data_args.seed, disable_bn_stats=model_args.disable_bn_stats)
    net.load_state_dict(state)
    net.cuda()

    drop_features(train_args, net, train_loader, val_loader, device=0)
def main_consolidate(data_args, train_args, model_args):
    assert model_args.load_state_path, 'please specify a path to a pretrained model'
    state = torch.load(model_args.load_state_path)
    net = resnet18(num_classes=data_args.num_classes, seed=data_args.seed, disable_bn_stats=model_args.disable_bn_stats)
    if data_args.num_classes != state['fc.weight'].shape[0]:
        net.fc = nn.Linear(net.fc.in_features, state['fc.bias'].shape[0], bias=True)
    net.load_state_dict(state)
    net.cuda()

    if train_args.single_task:
        consolidate_single_task(data_args, train_args, net, device=0)
    else:
        consolidate_multi_task(data_args, train_args, net, device=0)
Beispiel #10
0
    def load_model(self):
        self.checkpoint = torch.load(self.model_checkpoint_file_path,
                                     map_location=lambda storage, loc: storage)
        self.model_args = self.checkpoint['args']

        self.num_classes = None
        if self.model_args.model_type == 'food179':
            self.num_classes = 179
        elif self.model_args.model_type == 'nsfw':
            self.num_classes = 5
        else:
            raise ('Not Implemented!')

        if self.model_args.model_arc == 'resnet18':
            self.model = model.resnet18(num_classes=self.num_classes,
                                        zero_init_residual=True)
        elif self.model_args.model_arc == 'resnet34':
            self.model = model.resnet34(num_classes=self.num_classes,
                                        zero_init_residual=True)
        elif self.model_args.model_arc == 'resnet50':
            self.model = model.resnet50(num_classes=self.num_classes,
                                        zero_init_residual=True)
        elif self.model_args.model_arc == 'resnet101':
            self.model = model.resnet101(num_classes=self.num_classes,
                                         zero_init_residual=True)
        elif self.model_args.model_arc == 'resnet152':
            self.model = model.resnet152(num_classes=self.num_classes,
                                         zero_init_residual=True)
        elif self.model_args.model_arc == 'mobilenet':
            self.model = model.MobileNetV2(n_class=self.num_classes,
                                           input_size=256)
        else:
            raise ('Not Implemented!')

        self.model = nn.DataParallel(self.model)
        self.model.load_state_dict(self.checkpoint['model_state_dict'])
        self.model_epoch = self.checkpoint['epoch']
        self.model_test_acc = self.checkpoint['test_acc']
        self.model_best_acc = self.checkpoint['best_acc']
        self.model_test_acc_top5 = self.checkpoint['test_acc_top5']
        self.model_class_to_idx = self.checkpoint['class_to_idx']
        self.model_idx_to_class = {
            v: k
            for k, v in self.model_class_to_idx.items()
        }
        self.model_train_history_dict = self.checkpoint['train_history_dict']
        self.mean = self.checkpoint['NORM_MEAN']
        self.std = self.checkpoint['NORM_STD']
        self.model.eval()

        return
Beispiel #11
0
def predict_bagging(args):
    global loader_train, loader_val
    loader_train, loader_val, loader_test = load_data(train_bath_size=BATCH_SIZE, args=args,RANDOM_SEED=RANDOM_SEED, val_batch_size=BATCH_SIZE)

    device = set_device()
    setup_seed(RANDOM_SEED) #random seed

    model = resnet18()
    model_list=[]
    for maindir, subdir, file_name_list in os.walk(args.bagging_root):
      for filename in file_name_list:
        model_path = os.path.join(maindir, filename)
        print(model_path)
        model_i = load_model(model, model_path, device=device)
        model_i = model_i.to(device=device)
        model_i.eval()
        model_list.append(copy.deepcopy(model_i))

    result = []
    for batch_idx, (x, label, idx) in enumerate(loader_test):

        x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
        x = Variable(x)

        oneHot_result_sum = torch.zeros(x.size(0), 10)
        for model_i in model_list:
          output = model_i(x)
          _, predicted = output.max(1)
          predicted_cpu = predicted.cpu()
          oneHot_predicted = torch.zeros(output.size(0), output.size(1)).scatter_(1, predicted_cpu.unsqueeze(1), 1)
          oneHot_result_sum += oneHot_predicted
        
        _, bagging_result = oneHot_result_sum.max(1)

        bagging_result = bagging_result.cpu()

        if len(idx.shape)==1:
          idx = idx.unsqueeze(1)
        index_predicted = torch.cat([idx, bagging_result.unsqueeze(1)], dim=1)
        index_predicted = index_predicted.cpu().data.numpy()
        result.extend(index_predicted)

    headers = ['image_id', 'label']

    with open(args.predict_output_root, 'w', newline='')as f:
        f_csv = csv.writer(f)
        f_csv.writerow(headers)
        f_csv.writerows(result)
Beispiel #12
0
def main(args=None):

    data_set = {
        x: guipang(cfg=cfg['dataset_guipang'], part=x) for x in ['train', 'val']
    }
    # data_set = {
    #     x: qiafan(cfg=cfg['dataset_qiafan'], part=x) for x in ['train', 'val']
    # }
    data_loader = {
        x: data.DataLoader(data_set[x], batch_size=cfg['batch_size'],
                           num_workers=4, shuffle=True, pin_memory=False)
        for x in ['train', 'val']
    }

	# Create the model
	if cfg['depth'] == 18:
		retinanet = model.resnet18(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	elif cfg['depth'] == 34:
		retinanet = model.resnet34(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	elif cfg['depth'] == 50:
		retinanet = model.resnet50(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	elif cfg['depth'] == 101:
		retinanet = model.resnet101(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	elif cfg['depth'] == 152:
		retinanet = model.resnet152(
		    num_classes=dataset_train.num_classes(), pretrained=True)
	else:
		raise ValueError(
		    'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()

	retinanet = torch.nn.DataParallel(retinanet).cuda()

	optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
Beispiel #13
0
def get_nn_model(run_dir, ds):
    config.log.info('==> Building model...')
    if config.args.model not in config.torch_models:
        raise NotImplementedError
    net = None
    if config.args.model == config.Resnet18:
        net = model.resnet18(num_classes=ds.label_count)
    elif config.args.model == config.OverfitResnet:
        net = model.OverfitResNet18(num_classes=ds.label_count)
    elif config.args.model == config.MLP:
        net = model.create_mlp(len(ds.feature_columns), ds.label_count)
    elif config.args.model == config.LeNet:
        if config.args.dataset in config.ColoredMNIST_lst:
            net = model.LeNetMNIST(3, ds.label_count)  # use to colored mnist
        elif config.args.dataset == config.Timit:
            net = model.LeNetTIMIT(1, ds.label_count)  # use for timit
        elif config.args.dataset == config.Timit2Groups:
            # net = model.LeNetTIMIT(1, ds.label_count)  # use for timit
            net = model.LeNetTIMIT2(1, ds.label_count)  # use for timit
    net = net.to(config.args.torch_device)
    if config.args.task in [
            *config.augment_testing_lst, config.evaluate_fairness
    ]:
        net_path = os.path.join(run_dir, config.args.save_model)
        config.log.info(f'==> Loading model from: {net_path}')
        if config.args.use_cuda:
            state = torch.load(net_path)
        else:
            state = torch.load(net_path, map_location=torch.device('cpu'))
        net.load_state_dict(state['net'])
    # support cuda
    if config.args.use_cuda:
        config.log.info('Using CUDA')
        config.log.info('Parallel training on {0} GPUs.'.format(
            torch.cuda.device_count()))
        net = torch.nn.DataParallel(net,
                                    device_ids=list(
                                        range(torch.cuda.device_count())))
        cudnn.benchmark = True
    return net
Beispiel #14
0
def main(args=None):
    from dataloader import JinNanDataset, Augmenter, UnNormalizer, Normalizer,Resizer
    from torch.utils.data import Dataset, DataLoader
    from torchvision import datasets, models, transforms
    import model
    import torch
    import argparse


    parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')
    parser.add_argument('--dataset',default='jingnan', help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--threshold',help='treshold')
    parser.add_argument('--dataset_path', help='Path to file containing training and validation annotations (optional, see readme)') 
    parser.add_argument('--model_path',help=('the model path'))
    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
    parser = parser.parse_args(args)

    dataset_val=JinNanDataset(parser.dataset_path, set_name='val', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
    # Create the model
    if parser.depth == 18:
    	retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True)
    elif parser.depth == 34:
    	retinanet = model.resnet34(num_classes=dataset_val.num_classes(), pretrained=True)
    elif parser.depth == 50:
    	retinanet = model.resnet50(num_classes=dataset_val.num_classes(), pretrained=True)
    elif parser.depth == 101:
    	retinanet = model.resnet101(num_classes=dataset_val.num_classes(), pretrained=True)
    elif parser.depth == 152:
    	retinanet = model.resnet152(num_classes=dataset_val.num_classes(), pretrained=True)
    else:
    	raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')		

    retinanet=torch.load(parser.model_path)
    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    retinanet.eval()
    print('Evaluating dataset')
    evaluate_jinnan(dataset_val, retinanet)
Beispiel #15
0
def train(rank, nprocs, args):
    print(rank)
    torch.distributed.init_process_group(backend='nccl',
                                         init_method='tcp://127.0.0.1:23456',
                                         rank=rank,
                                         world_size=args.world_size)
    # seed for reproducibility
    torch.manual_seed(1)
    torch.cuda.manual_seed(1)
    torch.backends.cudnn.deterministic = True
    # create dataset.
    #train_loader, test_loader = partition_dataset(rank, args.world_size, args)
    train_loader, test_loader, train_sampler = create_dataloader(
        '../data', args.world_size, args.batch_size)
    print("loading dataset successed!")
    # create model.
    model = resnet18()

    torch.cuda.set_device(rank)
    model.cuda(rank)
    cudnn.benchmark = True
    # define the optimizer.
    #optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=0.9)
    optimizer = LocalSGD(model.parameters(),
                         lr=args.lr,
                         gmf=0,
                         tau=args.tau,
                         size=args.world_size,
                         momentum=0.9,
                         nesterov=True,
                         weight_decay=1e-4)
    # define the criterion and lr scheduler.
    criterion = nn.CrossEntropyLoss().cuda()
    for epoch in range(args.epoches):
        acc = train_one_epoch(model, optimizer, criterion, train_loader,
                              test_loader, epoch, rank)
        print(acc)
        break
    def build(self, depth=50, learning_rate=1e-5, ratios=[0.5, 1, 2],
              scales=[2 ** 0, 2 ** (1.0 / 3.0), 2 ** (2.0 / 3.0)]):
        # Create the model
        if depth == 18:
            retinanet = model.resnet18(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                       weights_dir=self.weights_dir_path,
                                       pretrained=True)
        elif depth == 34:
            retinanet = model.resnet34(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                       weights_dir=self.weights_dir_path,
                                       pretrained=True)
        elif depth == 50:
            retinanet = model.resnet50(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                       weights_dir=self.weights_dir_path,
                                       pretrained=True)
        elif depth == 101:
            retinanet = model.resnet101(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                        weights_dir=self.weights_dir_path,
                                        pretrained=True)
        elif depth == 152:
            retinanet = model.resnet152(num_classes=self.dataset_train.num_classes(), ratios=ratios, scales=scales,
                                        weights_dir=self.weights_dir_path,
                                        pretrained=True)
        else:
            raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')
        self.retinanet = retinanet.to(device=self.device)
        self.retinanet.training = True
        self.optimizer = optim.Adam(self.retinanet.parameters(), lr=learning_rate)
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=3, verbose=True)

        if self.checkpoint is not None:
            self.retinanet.load_state_dict(self.checkpoint['model'])
            self.optimizer.load_state_dict(self.checkpoint['optimizer'])
            self.scheduler.load_state_dict(self.checkpoint['scheduler'])  # TODO: test this, is it done right?
            # TODO is it right to resume_read_trial optimizer and schedular like this???
        self.ratios = ratios
        self.scales = scales
        self.depth = depth
Beispiel #17
0
def predict_main(args):
    global loader_train, loader_val
    loader_train, loader_val, loader_test = load_data(train_bath_size=BATCH_SIZE, args=args,RANDOM_SEED=RANDOM_SEED, val_batch_size=BATCH_SIZE)

    device = set_device()
    setup_seed(RANDOM_SEED) #random seed

    model = resnet18()
    model = load_model(model, args.load_model_path, device=device)
    model = model.to(device=device)

    test_epoch(model, nn.CrossEntropyLoss(), loader_val, device, 0, 1)

    model.eval()
    result = []
    for batch_idx, (x, label, idx) in enumerate(loader_test):

        x = x.to(device=device, dtype=dtype)  # move to device, e.g. GPU
        x = Variable(x)

        output = model(x)
        _, predicted = output.max(1)

        predicted = predicted.cpu()

        if len(idx.shape)==1:
          idx = idx.unsqueeze(1)
        index_predicted = torch.cat([idx, predicted.unsqueeze(1)], dim=1)
        index_predicted = index_predicted.cpu().data.numpy()
        result.extend(index_predicted)

    headers = ['image_id', 'label']

    with open(args.predict_output_root, 'w', newline='')as f:
        f_csv = csv.writer(f)
        f_csv.writerow(headers)
        f_csv.writerows(result)
Beispiel #18
0
    def set_models(self, dataset_train):
        # Create the model
        if self.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif self.depth == 34:
            retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif self.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif self.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(), pretrained=True)
        elif self.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

        if torch.cuda.device_count() > 1:
            print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
            retinanet = nn.DataParallel(retinanet)

        self.retinanet = retinanet.to(self.device)
        self.retinanet.training = True
        self.optimizer = optim.Adam(self.retinanet.parameters(), lr=self.lr)

        # This lr_shceduler reduce the learning rate based on the models's validation loss
        self.scheduler = optim.lr_scheduler.ReduceLROnPlateau(self.optimizer,
                                                              patience=3,
                                                              verbose=True)

        self.loss_hist = collections.deque(maxlen=500)
Beispiel #19
0
def train(num_epoch, learning_rate, save_name,batch_size, early_stopping_epoch=10, cuda_flag=False):
    # 基本参数
    num_epoch = num_epoch
    lr_decay = 500
    lr = learning_rate
    batch_size=batch_size

    model = resnet18()
    if cuda_flag:
        print('using cuda...')
        model = model.cuda()
    else:
        print('using cpu...')
    # 模型参数初始化
    def weight_init(m):
        # 使用isinstance来判断m属于什么类型
        if isinstance(m, nn.Conv2d):
            # n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
            # m.weight.data.normal_(0, math.sqrt(2. / n))
            torch.nn.init.xavier_uniform_(m.weight)
            torch.nn.init.constant_(m.bias, 0.1)
        elif isinstance(m, nn.Linear):
            m.weight.data.normal_()  # 全连接层参数初始化
        elif isinstance(m, nn.BatchNorm2d):
            # # m中的weight,bias其实都是Variable,为了能学习参数以及后向传播
            m.weight.data.fill_(1)
            m.bias.data.zero_()
    model.apply(weight_init)

    # 定义损失函数
    loss_func = nn.CrossEntropyLoss()
    # 定义优化函数
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer, 1000, gamma=lr_decay)

    train_data_loader, valid_data_loader, test_data_loader, _ = my_data_loader(
        batch_size)
    train_loss_epoch = []
    valid_loss_epoch = []
    best_model = None
    import sys
    min_valid_loss = sys.maxsize
    for epoch in range(num_epoch):
        model.train()
        train_loss = 0
        train_pred_label = []
        train_true_label = []
        valid_pred_label = []
        valid_true_label = []
        # 评估accuracy
        for x, y in train_data_loader:
            train_true_label.extend(y)
            if cuda_flag:
                x = x.cuda()
                y = y.cuda()
            optimizer.zero_grad()  # 这行代码很关键,不加会导致训练误差和验证误差都很大!!!
            pred_y = model(x)
            loss = loss_func(pred_y, y)
            if cuda_flag:
                pred_label = torch.argmax(pred_y.detach().cpu(), dim=1)
                train_pred_label.extend(pred_label)
                train_loss += loss.detach().cpu().numpy()
            else:
                train_loss += loss.detach().numpy()
            loss.backward()
            optimizer.step()
        train_accuracy = accuracy_score(train_true_label, train_pred_label)

        # valid performance
        model.eval()
        valid_loss = 0

        for valid_x, valid_y in valid_data_loader:
            valid_true_label.extend(valid_y)
            if cuda_flag:
                valid_x = valid_x.cuda()
                valid_y = valid_y.cuda()
            with torch.no_grad():  # 这行代码很关键,不加就会导致 cuda out of memory 的问题
                pred_valid_y = model(valid_x)
                cur_valid_loss = loss_func(pred_valid_y, valid_y)
            if cuda_flag:
                pred_valid_label = torch.argmax(
                    pred_valid_y.detach().cpu(), dim=1)
                valid_pred_label.extend(pred_valid_label)
                valid_loss += cur_valid_loss.detach().cpu().numpy()

            else:
                valid_loss += cur_valid_loss.detach().numpy()
        valid_accuracy = accuracy_score(valid_true_label, valid_pred_label)
        print('===========epoch:{}, train_loss:{},  valid_loss:{}, train acc:{},valid acc:{}==========='.format(
            epoch, train_loss, valid_loss, train_accuracy, valid_accuracy))
        train_loss_epoch.append(train_loss)
        valid_loss_epoch.append(valid_loss)
    
        ## 只需要保存当前效果最好的轮次的模型就可以了
        if valid_loss<=min_valid_loss:
            best_model = copy.deepcopy(model)
        if early_stopping(valid_loss_epoch, early_stopping_epoch):
            break
        # checkpoint
        # 每隔10个epoch就checkpoint一下
        # 修改成只保存early_stopping的时候效果最好的模型
        # if epoch % 10 == 0 and epoch != 0:
        #     checkpoint_tmp(model, save_name)
    min_loss_epoch = valid_loss_epoch.index(min(valid_loss_epoch))
    save_name += '-min_loss_epoch_{}'.format(min_loss_epoch)
    checkpoint_tmp(best_model, save_name)
def main(args=None):

    parser     = argparse.ArgumentParser(description='Simple testing script for RetinaNet network.')

    parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.',default = "csv")
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)',default="binary_class.csv")
    parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')
    parser.add_argument('--csv_box_annot', help='Path to file containing predicted box annotations ')

    parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=18)
    parser.add_argument('--epochs', help='Number of epochs', type=int, default=500)
    parser.add_argument('--model', help='Path of .pt file with trained model',default = 'esposallescsv_retinanet_0.pt')
    parser.add_argument('--model_out', help='Path of .pt file with trained model to save',default = 'trained')

    parser.add_argument('--score_threshold', help='Score above which boxes are kept',default=0.15)
    parser.add_argument('--nms_threshold', help='Score above which boxes are kept',default=0.2)
    parser.add_argument('--max_epochs_no_improvement', help='Max epochs without improvement',default=100)
    parser.add_argument('--max_boxes', help='Max boxes to be fed to recognition',default=50)
    parser.add_argument('--seg_level', help='Line or word, to choose anchor aspect ratio',default='line')
    parser.add_argument('--htr_gt_box',help='Train recognition branch with box gt (for debugging)',default=False)
    parser = parser.parse_args(args)
    
    # Create the data loaders

    if parser.dataset == 'csv':


        if parser.csv_classes is None:
            raise ValueError('Must provide --csv_classes when training on COCO,')


        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))


        if parser.csv_box_annot is not None:
            box_annot_data = CSVDataset(train_file=parser.csv_box_annot, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

        else:    
            box_annot_data = None
    else:
        raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

    
    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
        dataloader_val = DataLoader(dataset_val, num_workers=0, collate_fn=collater, batch_sampler=sampler_val)

    if box_annot_data is not None:
        sampler_val = AspectRatioBasedSampler(box_annot_data, batch_size=1, drop_last=False)
        dataloader_box_annot = DataLoader(box_annot_data, num_workers=0, collate_fn=collater, batch_sampler=sampler_val)

    else:
        dataloader_box_annot = dataloader_val

    if not os.path.exists('trained_models'):
        os.mkdir('trained_models')

    # Create the model

    alphabet=dataset_val.alphabet
    if os.path.exists(parser.model):
        retinanet = torch.load(parser.model)
    else:
        if parser.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_val.num_classes(), pretrained=True,max_boxes=int(parser.max_boxes),score_threshold=float(parser.score_threshold),seg_level=parser.seg_level,alphabet=alphabet)
        elif parser.depth == 34:
            retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 101:
            retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 152:
            retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')        
    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    
    retinanet = torch.nn.DataParallel(retinanet).cuda()
    
    #retinanet = torch.load('../Documents/TRAINED_MODELS/pytorch-retinanet/esposallescsv_retinanet_99.pt')
    #print "LOADED pretrained MODEL\n\n"
    

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=4, verbose=True)

    loss_hist = collections.deque(maxlen=500)
    ctc = CTCLoss()
    retinanet.module.freeze_bn()
    best_cer = 1000
    epochs_no_improvement=0
    
    cers=[]    
    retinanet.eval()
    retinanet.module.epochs_only_det = 0
    #retinanet.module.htr_gt_box = False
    
    retinanet.training=False    
    if parser.score_threshold is not None:
        retinanet.module.score_threshold = float(parser.score_threshold) 
    
    '''if parser.dataset == 'csv' and parser.csv_val is not None:

        print('Evaluating dataset')
    '''
    mAP = csv_eval.evaluate(dataset_val, retinanet,score_threshold=retinanet.module.score_threshold)
    aps = []
    for k,v in mAP.items():
        aps.append(v[0])
    print ("VALID mAP:",np.mean(aps))
            
    print("score th",retinanet.module.score_threshold)
    for idx,data in enumerate(dataloader_box_annot):
        print("Eval CER on validation set:",idx,"/",len(dataloader_box_annot),"\r")
        if box_annot_data:
            image_name = box_annot_data.image_names[idx].split('/')[-1].split('.')[-2]
        else:    
            image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2]
        #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val)
        text_gt_path="/".join(dataset_val.image_names[idx].split('/')[:-1])
        text_gt = os.path.join(text_gt_path,image_name+'.txt')
        f =open(text_gt,'r')
        text_gt_lines=f.readlines()[0]
        transcript_pred = get_transcript(image_name,data,retinanet,retinanet.module.score_threshold,float(parser.nms_threshold),dataset_val,alphabet)
        cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))
        print("GT",text_gt_lines)
        print("PREDS SAMPLE:",transcript_pred)
        print("VALID CER:",np.mean(cers),"best CER",best_cer)    
    print("GT",text_gt_lines)
    print("PREDS SAMPLE:",transcript_pred)
    print("VALID CER:",np.mean(cers),"best CER",best_cer)    
Beispiel #21
0
def main(args=None):
#def main(epoch):
	parser     = argparse.ArgumentParser(description='Simple training script for training a RetinaNet network.')

	parser.add_argument('--dataset', help='Dataset type, must be one of csv or coco.')
	parser.add_argument('--coco_path', help='Path to COCO directory')
	parser.add_argument('--csv_train', help='Path to file containing training annotations (see readme)')
	parser.add_argument('--csv_classes', help='Path to file containing class list (see readme)')
	parser.add_argument('--csv_val', help='Path to file containing validation annotations (optional, see readme)')

	parser.add_argument('--depth', help='Resnet depth, must be one of 18, 34, 50, 101, 152', type=int, default=50)
	parser.add_argument('--epochs', help='Number of epochs', type=int, default=100)

	#parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint')
	parser.add_argument('--start-epoch', default=0, type=int, help='manual epoch number (useful on restarts)')

	parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)')

	parser = parser.parse_args(args)
	#args = parser.parse_args()        
	#parser = parser.parse_args(epoch)

	# Create the data loaders
	if parser.dataset == 'coco':

		if parser.coco_path is None:
			raise ValueError('Must provide --coco_path when training on COCO,')

		dataset_train = CocoDataset(parser.coco_path, set_name='train2017', transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
		dataset_val = CocoDataset(parser.coco_path, set_name='val2017', transform=transforms.Compose([Normalizer(), Resizer()]))

	elif parser.dataset == 'csv':

		if parser.csv_train is None:
			raise ValueError('Must provide --csv_train when training on COCO,')

		if parser.csv_classes is None:
			raise ValueError('Must provide --csv_classes when training on COCO,')


		dataset_train = CSVDataset(train_file=parser.csv_train, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Augmenter(), Resizer()]))

		if parser.csv_val is None:
			dataset_val = None
			print('No validation annotations provided.')
		else:
			dataset_val = CSVDataset(train_file=parser.csv_val, class_list=parser.csv_classes, transform=transforms.Compose([Normalizer(), Resizer()]))

	else:
		raise ValueError('Dataset type not understood (must be csv or coco), exiting.')

	sampler = AspectRatioBasedSampler(dataset_train, batch_size=4, drop_last=False)
	dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

	if dataset_val is not None:
		sampler_val = AspectRatioBasedSampler(dataset_val, batch_size=1, drop_last=False)
		dataloader_val = DataLoader(dataset_val, num_workers=3, collate_fn=collater, batch_sampler=sampler_val)

	# Create the model
	if parser.depth == 18:
		retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 34:
		retinanet = model.resnet34(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 50:
		retinanet = model.resnet50(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 101:
		retinanet = model.resnet101(num_classes=dataset_train.num_classes(), pretrained=True)
	elif parser.depth == 152:
		retinanet = model.resnet152(num_classes=dataset_train.num_classes(), pretrained=True)
	else:
		raise ValueError('Unsupported model depth, must be one of 18, 34, 50, 101, 152')		

	use_gpu = True

	if use_gpu:
		retinanet = retinanet.cuda()

	#retinanet().load_state_dict(torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/resnet50-19c8e357.pth'))
       
	#if True:
           #print('==> Resuming from checkpoint..')
           #checkpoint = torch.load('/users/wenchi/ghwwc/Pytorch-retinanet-master/coco_retinanet_2.pt')
           #retinanet().load_state_dict(checkpoint)
           #best_loss = checkpoint['loss']
           #start_epoch = checkpoint['epoch']
        
	
	retinanet = torch.nn.DataParallel(retinanet).cuda()

	retinanet.training = True

	#optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
	optimizer = optim.SGD(retinanet.parameters(), lr=1e-5)

	scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)

	loss_hist = collections.deque(maxlen=500)

	retinanet.train()
	#retinanet.freeze_bn()               #for train from a middle state
	retinanet.module.freeze_bn()       #for train from the very beginning

	print('Num training images: {}'.format(len(dataset_train)))

	for epoch_num in range(parser.start_epoch, parser.epochs):

		if parser.resume:
		    if os.path.isfile(parser.resume):
                        print("=>loading checkpoint '{}'".format(parser.resume))
                        checkpoint = torch.load(parser.resume)
                        print(parser.start_epoch)
                        #parser.start_epoch = checkpoint['epoch']
                        #retinanet.load_state_dict(checkpoint['state_dict'])
                        retinanet=checkpoint
                        #retinanet.load_state_dict(checkpoint)
                        print(retinanet)
                        #optimizer.load_state_dict(checkpoint)
                        print("=> loaded checkpoint '{}' (epoch {})".format(parser.resume, checkpoint))
		    else:
                        print("=> no checkpoint found at '{}'".format(parser.resume))

		retinanet.train()
		retinanet.freeze_bn()
		#retinanet.module.freeze_bn()

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)
		
		epoch_loss = []
		
		for iter_num, data in enumerate(dataloader_train):
			try:
				optimizer.zero_grad()

				classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot'].cuda()])

				classification_loss = classification_loss.mean()
				regression_loss = regression_loss.mean()

				loss = classification_loss + regression_loss
				
				if bool(loss == 0):
					continue

				loss.backward()

				torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

				optimizer.step()

				loss_hist.append(float(loss))

				epoch_loss.append(float(loss))

				print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss), np.mean(loss_hist)))
				
				del classification_loss
				del regression_loss
			except Exception as e:
				print(e)
				continue

		if parser.dataset == 'coco':

			print('Evaluating dataset')

			coco_eval.evaluate_coco(dataset_val, retinanet)

		elif parser.dataset == 'csv' and parser.csv_val is not None:

			print('Evaluating dataset')

			mAP = csv_eval.evaluate(dataset_val, retinanet)

		
		scheduler.step(np.mean(epoch_loss))	

		#torch.save(retinanet.module, '{}_retinanet_101_{}.pt'.format(parser.dataset, epoch_num))
		torch.save(retinanet, '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num))
		name = '{}_retinanet_dilation_experiment1_{}.pt'.format(parser.dataset, epoch_num)
		parser.resume = '/users/wenchi/ghwwc/pytorch-retinanet-master_new/name'

	retinanet.eval()

	torch.save(retinanet, 'model_final_dilation_experiment1.pt'.format(epoch_num))
Beispiel #22
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--coco_path',
                        help='Path to COCO directory',
                        type=str,
                        default='./data/coco')
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--checkpoint',
                        help='The path to the checkpoint.',
                        type=str,
                        default=None)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--batch_size',
                        help='Number of batch',
                        type=int,
                        default=16)
    parser.add_argument('--gpu_ids',
                        help='Gpu parallel',
                        type=str,
                        default='1, 2')

    parser = parser.parse_args(args)

    # Create the data lodaders
    dataset_train = CocoDataset(parser.coco_path,
                                set_name='train2017',
                                transform=transforms.Compose(
                                    [Normalizer(),
                                     Augmenter(),
                                     Resizer()]))
    dataset_val = CocoDataset(parser.coco_path,
                              set_name='val2017',
                              transform=transforms.Compose(
                                  [Normalizer(), Resizer()]))

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=4,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=16,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    sampler_val = AspectRatioBasedSampler(dataset_val,
                                          batch_size=1,
                                          drop_last=False)
    dataloader_val = DataLoader(dataset_val,
                                num_workers=3,
                                collate_fn=collater,
                                batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()
    gpu_ids = parser.gpu_ids.split(',')
    device = torch.device("cuda:" + gpu_ids[0])
    torch.cuda.set_device(device)
    gpu_ids = list(map(int, gpu_ids))
    retinanet = torch.nn.DataParallel(retinanet, device_ids=gpu_ids).to(device)

    if parser.checkpoint:
        pretrained = torch.load(parser.checkpoint).state_dict()
        retinanet.module.load_state_dict(pretrained)

    # add tensorboard to record train log
    retinanet.training = True
    writer = SummaryWriter('./log')
    # writer.add_graph(retinanet, input_to_model=[images, labels])

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].to(device), data['ann'].to(device)])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))
                writer.add_scalar('Loss/train', loss, iter_num)
                writer.add_scalar('Loss/reg_loss', regression_loss, iter_num)
                writer.add_scalar('Loss/cls_loss', classification_loss,
                                  iter_num)

                epoch_loss.append(float(loss))

                if (iter_num + 1) % 1000 == 0:
                    print('Save model')
                    torch.save(
                        retinanet.module,
                        'COCO_retinanet_epoch{}_iter{}.pt'.format(
                            epoch_num, iter_num))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        print('Evaluating dataset')

        coco_eval.evaluate_coco(dataset_val, retinanet, writer)

        scheduler.step(np.mean(epoch_loss))

        torch.save(retinanet.module, 'COCO_retinanet_{}.pt'.format(epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final.pt'.format(epoch_num))
Beispiel #23
0
def main():
    data_args, train_args, model_args = parse_args(IncrDataArgs,
                                                   ExperimentArgs,
                                                   AllModelArgs)
    if train_args.batch and not train_args.multihead:
        train_loader, val_loader, test_loader = get_dataloaders(
            data_args, load_test=False)
    else:
        train_loader, val_loader, test_loader = get_dataloaders_incr(
            data_args, load_test=False, multihead_batch=train_args.batch)

    state = None
    # load pretrained feature extractor if specified
    if model_args.load_state_path:
        state = torch.load(model_args.load_state_path)

    if model_args.arch == 'resnet18':
        net = resnet18(num_classes=data_args.num_classes,
                       seed=data_args.seed,
                       disable_bn_stats=model_args.disable_bn_stats)
        if state is not None:
            state['fc.weight'], state['fc.bias'] = net.fc.weight, net.fc.bias
            net.load_state_dict(state)
    elif model_args.arch == 'lrm_resnet18':
        net = load_lrm(state=state,
                       num_classes=data_args.num_classes,
                       seed=data_args.seed,
                       disable_bn_stats=model_args.disable_bn_stats,
                       n_blocks=model_args.n_blocks,
                       block_size_alpha=model_args.block_size_alpha,
                       route_by_task=model_args.route_by_task,
                       fit_keys=train_args.fit_keys)

    # save state initialization if we will be reinitializing the model before each new exposure
    if train_args.exposure_reinit:
        torch.save(
            net.state_dict(),
            join(train_args.model_save_dir,
                 append_to_file(train_args.model_save_path, 'init')))
    net.cuda()

    if train_args.batch:
        if train_args.multihead:
            # trains model on batches of data across tasks while enforcing classification predictions to be within task
            train_batch_multihead(train_args,
                                  net,
                                  train_loader,
                                  val_loader,
                                  device=0)
            np.savez(join(train_args.acc_save_dir,
                          train_args.incr_results_path),
                     entropy=net.get_entropy(),
                     class_div=net.get_class_routing_divergence())
        else:
            train(train_args,
                  net,
                  train_loader,
                  val_loader,
                  device=0,
                  multihead=False)
    else:
        train_incr(train_args, net, train_loader, val_loader, device=0)
Beispiel #24
0
def main():

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    # Image Preprocessing
    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    test_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])

    num_epochs = args.epochs
    batch_size = args.batch_size

    train_dataset = datasets.folder.ImageFolder(
        root='/fast/users/a1675776/data/imagenet/train/',
        transform=train_transform)
    test_dataset = datasets.folder.ImageFolder(
        root='/fast/users/a1675776/data/imagenet/val/',
        transform=test_transform)

    train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=10,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                              batch_size=batch_size,
                                              shuffle=False,
                                              num_workers=10,
                                              pin_memory=True)

    num_train = train_dataset.__len__()
    n_train_batches = math.floor(num_train / batch_size)

    criterion = nn.CrossEntropyLoss().cuda()
    bitW = 32
    bitA = 32
    model = resnet18(bitW, bitA)
    model = utils.dataparallel(model, 3)

    print("Compilation complete, starting training...")

    test_record = []
    train_record = []
    learning_rate = args.learning_rate
    epoch = 0
    step_idx = 0
    best_top1 = 0

    optimizer = torch.optim.SGD(model.parameters(),
                                lr=learning_rate,
                                weight_decay=args.weight_decay,
                                momentum=args.momentum)

    for m in model.modules():
        if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear) or isinstance(
                m, self_conv):
            c = float(m.weight.data[0].nelement())
            torch.nn.init.xavier_uniform(m.weight)
        elif isinstance(m, nn.BatchNorm2d):
            m.weight.data = m.weight.data.zero_().add(1.0)

    while epoch < num_epochs:

        epoch = epoch + 1
        # resume training
        if (args.resume_train) and (epoch == 1):
            checkpoint = torch.load(args.resume_dir)
            epoch = checkpoint['epoch']
            learning_rate = checkpoint['learning_rate']
            optimizer.load_state_dict(checkpoint['optimizer'])
            step_idx = checkpoint['step_idx']
            model.load_state_dict(checkpoint['state_dict'])
            test_record = list(np.load(args.weights_dir + 'test_record.npy'))
            train_record = list(np.load(args.weights_dir + 'train_record.npy'))

        logging.info('epoch %d lr %e', epoch, learning_rate)

        # training
        train_acc_top1, train_acc_top5, train_obj = train(
            train_loader, model, criterion, optimizer)
        logging.info('train_acc %f', train_acc_top1)
        train_record.append([train_acc_top1, train_acc_top5])
        np.save(args.weights_dir + 'train_record.npy', train_record)

        # test
        test_acc_top1, test_acc_top5, test_obj = infer(test_loader, model,
                                                       criterion)
        is_best = test_acc_top1 > best_top1
        if is_best:
            best_top1 = test_acc_top1

        logging.info('test_acc %f', test_acc_top1)
        test_record.append([test_acc_top1, test_acc_top5])
        np.save(args.weights_dir + 'test_record.npy', test_record)

        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'best_top1': best_top1,
                'step_idx': step_idx,
                'learning_rate': learning_rate,
            }, args, is_best)

        step_idx, learning_rate = utils.adjust_learning_rate(
            args, epoch, step_idx, learning_rate)

        for param_group in optimizer.param_groups:
            param_group['lr'] = learning_rate
Beispiel #25
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    # model cfg
    # parser.add_argument('--pretrained', action='store_true', default=False,
    #                     help='load pretrained model')
    parser.add_argument('--model-type',
                        type=str,
                        default="",
                        help="type of the model.")
    parser.add_argument('--model-structure',
                        type=int,
                        default=0,
                        metavar='N',
                        help='model structure to be trained (default: 0)')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint, (default: None)')
    parser.add_argument('--e',
                        '--evaluate',
                        dest='evaluate',
                        action='store_true',
                        help='evaluate model on validation set')
    # dataset
    parser.add_argument('--dataset-root',
                        type=str,
                        default="../datasets",
                        help="load dataset path.")
    parser.add_argument('--workers',
                        default=0,
                        type=int,
                        metavar='N',
                        help='number of data loading workers (default: 0)')
    parser.add_argument('--train-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for training (default: 128)')
    parser.add_argument('--test-batch-size',
                        type=int,
                        default=128,
                        metavar='N',
                        help='input batch size for testing (default: 128)')
    # train cfg
    parser.add_argument('--epochs',
                        type=int,
                        default=80,
                        metavar='N',
                        help='number of epochs to train (default: 10)')
    parser.add_argument('--start-epoch',
                        default=0,
                        type=int,
                        metavar='N',
                        help='manual epoch number (useful to restarts)')
    parser.add_argument('--lr',
                        type=float,
                        default=0.001,
                        metavar='LR',
                        help='learning rate (default: 0.001)')
    # optimizer
    parser.add_argument('--momentum',
                        default=0.9,
                        type=float,
                        metavar='M',
                        help='SGD momentum (default: 0.9)')
    parser.add_argument('--wd',
                        default=5e-4,
                        type=float,
                        metavar='W',
                        help='weight decay (default: 5e-4)')
    # scheduler
    parser.add_argument('--schedule',
                        type=int,
                        nargs='+',
                        default=[150, 225],
                        help='Decrease learning rate at these epochs.')
    parser.add_argument('--gamma',
                        type=float,
                        default=0.1,
                        help='LR is multiplied by gamma on schedule.')
    parser.add_argument('--decreasing-lr',
                        default='16,30,54',
                        help='decreasing strategy')
    # device init cfg
    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    # result output cfg
    parser.add_argument('--detail',
                        action='store_true',
                        default=False,
                        help='show log in detial')
    parser.add_argument(
        '--log-interval',
        type=int,
        default=10,
        metavar='N',
        help='how many batches to wait before logging training status')
    parser.add_argument('--save-model',
                        action='store_true',
                        default=True,
                        help='For Saving the current Model')
    parser.add_argument('--checkpoint-path',
                        type=str,
                        default="",
                        help="save model path.")

    parser.add_argument('--crxb-size',
                        type=int,
                        default=64,
                        help='corssbar size')
    parser.add_argument('--vdd',
                        type=float,
                        default=3.3,
                        help='supply voltage')
    parser.add_argument('--gwire',
                        type=float,
                        default=0.0357,
                        help='wire conductacne')
    parser.add_argument('--gload',
                        type=float,
                        default=0.25,
                        help='load conductance')
    parser.add_argument('--gmax',
                        type=float,
                        default=0.000333,
                        help='maximum cell conductance')
    parser.add_argument('--gmin',
                        type=float,
                        default=0.000000333,
                        help='minimum cell conductance')
    parser.add_argument('--ir-drop',
                        action='store_true',
                        default=False,
                        help='switch to turn on ir drop analysis')
    parser.add_argument('--scaler-dw',
                        type=float,
                        default=1,
                        help='scaler to compress the conductance')
    parser.add_argument('--test',
                        action='store_true',
                        default=False,
                        help='switch to turn inference mode')
    parser.add_argument('--enable_noise',
                        action='store_true',
                        default=False,
                        help='switch to turn on noise analysis')
    parser.add_argument('--enable_SAF',
                        action='store_true',
                        default=False,
                        help='switch to turn on SAF analysis')
    parser.add_argument('--enable_ec-SAF',
                        action='store_true',
                        default=False,
                        help='switch to turn on SAF error correction')
    parser.add_argument('--freq',
                        type=float,
                        default=10e6,
                        help='scaler to compress the conductance')
    parser.add_argument('--temp',
                        type=float,
                        default=300,
                        help='scaler to compress the conductance')

    args = parser.parse_args()
    print("+++", args)

    # Train the network on the training data
    # Test the network on the test data
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    torch.manual_seed(args.seed)
    device = torch.device("cuda" if use_cuda else "cpu")
    print(device)

    # net = alexnet(args.pretrained, args.resume, num_classes=10, structure=args.model_structure)
    # create model
    crxb_cfg = {
        'crxb_size': args.crxb_size,
        'gmax': args.gmax,
        'gmin': args.gmin,
        'gwire': args.gwire,
        'gload': args.gload,
        'vdd': args.vdd,
        'ir_drop': args.ir_drop,
        'device': device,
        'freq': args.freq,
        'temp': args.temp,
        'enable_SAF': args.enable_SAF,
        'enable_noise': args.enable_noise,
        'enable_ec_SAF': args.enable_ec_SAF,
        'quantize': 64
    }

    if args.model_type == 'VGG16':
        net = model.VGG16()
    elif args.model_type == 'cifar10' or args.model_type == 'VGG8':
        net = model.cifar10(n_channel=128, physical=0, **crxb_cfg)
    elif args.model_type == 'alexnet':
        net = model.alexnet(num_classes=10, structure=args.model_structure)
    elif args.model_type == 'resnet18':
        net = model.resnet18()
    else:
        net = model.cifar10(n_channel=128, physical=True, **crxb_cfg)

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        net = nn.DataParallel(net)
    net.to(device)

    # for param in net.parameters():
    #     param = nn.init.normal_(param)

    # config
    milestones = list(map(int, args.decreasing_lr.split(',')))
    print(milestones)

    # optimizer = optim.SGD(net.parameters(), lr=lr, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY) # not good enough 68%
    optimizer = optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.wd)
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=milestones,
                                         gamma=args.gamma)

    # optionlly resume from a checkpoint
    if args.resume:
        print("=> using pre-trained model '{}'".format(args.model_type))
    else:
        print("=> creating model '{}'".format(args.model_type))

    global best_prec1
    # if args.resume:
    #     if os.path.isfile(args.resume):
    #         print("=> loading checkpoint '{}'".format(args.resume))
    #         checkpoint = torch.load(args.resume)
    #         args.start_epoch = checkpoint['epoch']
    #         best_prec1 = checkpoint['best_prec1']
    #         net.load_state_dict(checkpoint['state_dict'])
    #         optimizer.load_state_dict(checkpoint['optimizer'])
    #         print("=> loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch']))
    #     else:
    #         print("=> no checkpoint found at '{}'".format(args.resume))

    # Data loading
    kwargs = {
        'num_workers': args.workers,
        'pin_memory': True
    } if use_cuda else {}
    trainloader, valloader, testloader = getcifar(args, 'pad', 4, True, 32,
                                                  True, **kwargs)
    print(len(trainloader), len(valloader), len(testloader))

    print('\r\n1!!!model_dict')
    model_dict = net.state_dict()
    print(model_dict.keys(), "\r\n2!!!model parameters")
    parm = {}
    for name, parameters in net.named_parameters():
        print(name)

    print('\r\n3!!!pretrained_dict')
    checkpoint = torch.load(args.resume)
    # print(type(checkpoint),'\r\n!!!')
    # print(checkpoint.keys(),'\r\n!!!')

    pretrained_dict = checkpoint['state_dict']
    print(pretrained_dict.keys(), '\r\n4!!!new_dict')

    import re
    new_dict = {}
    for k, v in pretrained_dict.items():
        if k not in model_dict:
            bn_detect = re.match(
                r'module\.features\.(1|4|8|11|15|18|22)\.(running_mean|num_batches_tracked|running_var)',
                k)
            if bn_detect:
                k = 'module.features.{}.bn.{}'.format(bn_detect.group(1),
                                                      bn_detect.group(2))
                print(k)
                new_dict[k] = v
            else:
                pass
        else:
            new_dict[k] = v
    print(new_dict.keys(), '\r\n5!!!')
    print([k for k, v in new_dict.items() if k not in model_dict], '\r\n')
    print([k for k, v in model_dict.items() if k not in new_dict])
    # print('net buffers')
    # print([n for n,v in net.named_buffers()], '\r\n !!!ideal_buffer')

    ideal_buffer = torch.load("../models/cifar10_crxb_ideal_VGG8_0_final.pth")
    # buffer_list = [k for k, v in ideal_buffer['state_dict'].items() if k not in new_dict]
    for k, v in ideal_buffer['state_dict'].items():
        if k not in new_dict:
            new_dict[k] = v
    print("\r\ncheck:", new_dict.keys() == model_dict.keys())
    model_dict.update(new_dict)
    # model_dict.update(ideal_buffer['state_dict'])
    net.load_state_dict(model_dict)
    # print('vvv')
    # print([k for k,v in ideal_buffer['state_dict'].items() if k not in model_dict])
    # net.load_state_dict(ideal_buffer['state_dict'])

    test(args, net, device, testloader)
Beispiel #26
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.',
                        default="csv")
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)',
                        default="binary_class.csv")
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=18)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=500)
    parser.add_argument('--epochs_only_det',
                        help='Number of epochs to train detection part',
                        type=int,
                        default=1)
    parser.add_argument('--max_epochs_no_improvement',
                        help='Max epochs without improvement',
                        type=int,
                        default=100)
    parser.add_argument('--pretrained_model',
                        help='Path of .pt file with pretrained model',
                        default='esposallescsv_retinanet_0.pt')
    parser.add_argument('--model_out',
                        help='Path of .pt file with trained model to save',
                        default='trained')

    parser.add_argument('--score_threshold',
                        help='Score above which boxes are kept',
                        type=float,
                        default=0.5)
    parser.add_argument('--nms_threshold',
                        help='Score above which boxes are kept',
                        type=float,
                        default=0.2)
    parser.add_argument('--max_boxes',
                        help='Max boxes to be fed to recognition',
                        default=95)
    parser.add_argument('--seg_level',
                        help='[line, word], to choose anchor aspect ratio',
                        default='word')
    parser.add_argument(
        '--early_stop_crit',
        help='Early stop criterion, detection (map) or transcription (cer)',
        default='cer')
    parser.add_argument('--max_iters_epoch',
                        help='Max steps per epoch (for debugging)',
                        default=1000000)
    parser.add_argument('--train_htr',
                        help='Train recognition or not',
                        default='True')
    parser.add_argument('--train_det',
                        help='Train detection or not',
                        default='True')
    parser.add_argument(
        '--binary_classifier',
        help=
        'Wether to use classification branch as binary or not, multiclass instead.',
        default='False')
    parser.add_argument(
        '--htr_gt_box',
        help='Train recognition branch with box gt (for debugging)',
        default='False')
    parser.add_argument(
        '--ner_branch',
        help='Train named entity recognition with separate branch',
        default='False')

    parser = parser.parse_args(args)

    if parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train')

        dataset_name = parser.csv_train.split("/")[-2]

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    # Files for training log

    experiment_id = str(time.time()).split('.')[0]
    valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt', 'w')
    for arg in vars(parser):
        if getattr(parser, arg) is not None:
            valid_cer_f.write(
                str(arg) + ' ' + str(getattr(parser, arg)) + '\n')

    current_commit = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
    valid_cer_f.write(str(current_commit))

    valid_cer_f.write(
        "epoch_num   cer     best cer     mAP    best mAP     time\n")

    valid_cer_f.close()

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=1,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=0,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    if not os.path.exists('trained_models'):
        os.mkdir('trained_models')

    # Create the model

    train_htr = parser.train_htr == 'True'
    htr_gt_box = parser.htr_gt_box == 'True'
    ner_branch = parser.ner_branch == 'True'
    binary_classifier = parser.binary_classifier == 'True'
    torch.backends.cudnn.benchmark = False

    alphabet = dataset_train.alphabet
    if os.path.exists(parser.pretrained_model):
        retinanet = torch.load(parser.pretrained_model)
        retinanet.classificationModel = ClassificationModel(
            num_features_in=256,
            num_anchors=retinanet.anchors.num_anchors,
            num_classes=dataset_train.num_classes())
        if ner_branch:
            retinanet.nerModel = NERModel(
                feature_size=256,
                pool_h=retinanet.pool_h,
                n_classes=dataset_train.num_classes(),
                pool_w=retinanet.pool_w)
    else:
        if parser.depth == 18:
            retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       max_boxes=int(parser.max_boxes),
                                       score_threshold=float(
                                           parser.score_threshold),
                                       seg_level=parser.seg_level,
                                       alphabet=alphabet,
                                       train_htr=train_htr,
                                       htr_gt_box=htr_gt_box,
                                       ner_branch=ner_branch,
                                       binary_classifier=binary_classifier)

        elif parser.depth == 34:

            retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                       pretrained=True,
                                       max_boxes=int(parser.max_boxes),
                                       score_threshold=float(
                                           parser.score_threshold),
                                       seg_level=parser.seg_level,
                                       alphabet=alphabet,
                                       train_htr=train_htr,
                                       htr_gt_box=htr_gt_box)

        elif parser.depth == 50:
            retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                       pretrained=True)
        elif parser.depth == 101:
            retinanet = model.resnet101(
                num_classes=dataset_train.num_classes(), pretrained=True)
        elif parser.depth == 152:
            retinanet = model.resnet152(
                num_classes=dataset_train.num_classes(), pretrained=True)
        else:
            raise ValueError(
                'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True
    train_htr = parser.train_htr == 'True'
    train_det = parser.train_det == 'True'
    retinanet.htr_gt_box = parser.htr_gt_box == 'True'

    retinanet.train_htr = train_htr
    retinanet.epochs_only_det = parser.epochs_only_det

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-4)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=50,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)
    ctc = CTCLoss()
    retinanet.train()
    retinanet.module.freeze_bn()

    best_cer = 1000
    best_map = 0
    epochs_no_improvement = 0
    verbose_each = 20
    optimize_each = 1
    objective = 100
    best_objective = 10000

    print(('Num training images: {}'.format(len(dataset_train))))

    for epoch_num in range(parser.epochs):
        cers = []

        retinanet.training = True

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            if iter_num > int(parser.max_iters_epoch): break
            try:
                if iter_num % optimize_each == 0:
                    optimizer.zero_grad()
                (classification_loss, regression_loss, ctc_loss,
                 ner_loss) = retinanet([
                     data['img'].cuda().float(), data['annot'], ctc, epoch_num
                 ])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                if train_det:

                    if train_htr:
                        loss = ctc_loss + classification_loss + regression_loss + ner_loss

                    else:
                        loss = classification_loss + regression_loss + ner_loss

                elif train_htr:
                    loss = ctc_loss

                else:
                    continue
                if bool(loss == 0):
                    continue
                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
                if iter_num % verbose_each == 0:
                    print((
                        'Epoch: {} | Step: {} |Classification loss: {:1.5f} | Regression loss: {:1.5f} | CTC loss: {:1.5f} | NER loss: {:1.5f} | Running loss: {:1.5f} | Total loss: {:1.5f}\r'
                        .format(epoch_num, iter_num,
                                float(classification_loss),
                                float(regression_loss), float(ctc_loss),
                                float(ner_loss), np.mean(loss_hist),
                                float(loss), "\r")))

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))
                torch.cuda.empty_cache()

            except Exception as e:
                print(e)
                continue
        if parser.dataset == 'csv' and parser.csv_val is not None and train_det:

            print('Evaluating dataset')

            mAP, text_mAP, current_cer = csv_eval.evaluate(
                dataset_val, retinanet, score_threshold=parser.score_threshold)
            #text_mAP,_ = csv_eval_binary_map.evaluate(dataset_val, retinanet,score_threshold=parser.score_threshold)
            objective = current_cer * (1 - mAP)

        retinanet.eval()
        retinanet.training = False
        retinanet.score_threshold = float(parser.score_threshold)
        '''for idx,data in enumerate(dataloader_val):
            if idx>int(parser.max_iters_epoch): break
            print("Eval CER on validation set:",idx,"/",len(dataset_val),"\r")
            image_name = dataset_val.image_names[idx].split('/')[-1].split('.')[-2]

            #generate_pagexml(image_name,data,retinanet,parser.score_threshold,parser.nms_threshold,dataset_val)
            text_gt =".".join(dataset_val.image_names[idx].split('.')[:-1])+'.txt'
            f =open(text_gt,'r')
            text_gt_lines=f.readlines()[0]
            transcript_pred = get_transcript(image_name,data,retinanet,float(parser.score_threshold),float(parser.nms_threshold),dataset_val,alphabet)
            cers.append(float(editdistance.eval(transcript_pred,text_gt_lines))/len(text_gt_lines))'''

        t = str(time.time()).split('.')[0]

        valid_cer_f.close()
        #print("GT",text_gt_lines)
        #print("PREDS SAMPLE:",transcript_pred)

        if parser.early_stop_crit == 'cer':

            if float(objective) < float(
                    best_objective):  #float(current_cer)<float(best_cer):
                best_cer = current_cer
                best_objective = objective

                epochs_no_improvement = 0
                torch.save(
                    retinanet.module, 'trained_models/' + parser.model_out +
                    '{}_retinanet.pt'.format(parser.dataset))

            else:
                epochs_no_improvement += 1
            if mAP > best_map:
                best_map = mAP
        elif parser.early_stop_crit == 'map':
            if mAP > best_map:
                best_map = mAP
                epochs_no_improvement = 0
                torch.save(
                    retinanet.module, 'trained_models/' + parser.model_out +
                    '{}_retinanet.pt'.format(parser.dataset))

            else:
                epochs_no_improvement += 1
            if float(current_cer) < float(best_cer):
                best_cer = current_cer
        if train_det:
            print(epoch_num, "mAP: ", mAP, " best mAP", best_map)
        if train_htr:
            print("VALID CER:", current_cer, "best CER", best_cer)
        print("Epochs no improvement:", epochs_no_improvement)
        valid_cer_f = open('trained_models/' + parser.model_out + 'log.txt',
                           'a')
        valid_cer_f.write(
            str(epoch_num) + " " + str(current_cer) + " " + str(best_cer) +
            ' ' + str(mAP) + ' ' + str(best_map) + ' ' + str(text_mAP) + '\n')
        if epochs_no_improvement > 3:
            for param_group in optimizer.param_groups:
                if param_group['lr'] > 10e-5:
                    param_group['lr'] *= 0.1

        if epochs_no_improvement >= parser.max_epochs_no_improvement:
            print("TRAINING FINISHED AT EPOCH", epoch_num, ".")
            sys.exit()

        scheduler.step(np.mean(epoch_loss))
        torch.cuda.empty_cache()

    retinanet.eval()
Beispiel #27
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description="Simple training script for training a RetinaNet network.")

    parser.add_argument(
        "--dataset", help="Dataset type, must be one of csv or coco or ycb.")
    parser.add_argument("--path", help="Path to dataset directory")
    parser.add_argument(
        "--csv_train",
        help="Path to file containing training annotations (see readme)")
    parser.add_argument("--csv_classes",
                        help="Path to file containing class list (see readme)")
    parser.add_argument("--csv_val",
                        help="Path to file containing validation annotations "
                        "(optional, see readme)")

    parser.add_argument(
        "--depth",
        help="Resnet depth, must be one of 18, 34, 50, 101, 152",
        type=int,
        default=50)
    parser.add_argument("--epochs",
                        help="Number of epochs",
                        type=int,
                        default=100)
    parser.add_argument("--evaluate_every", default=20, type=int)
    parser.add_argument("--print_every", default=20, type=int)
    parser.add_argument('--distributed',
                        action="store_true",
                        help='Run model in distributed mode with DataParallel')

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == "coco":

        if parser.path is None:
            raise ValueError(
                "Must provide --path when training on non-CSV datasets")

        dataset_train = CocoDataset(parser.path,
                                    ann_file="instances_train2014.json",
                                    set_name="train2014",
                                    transform=transforms.Compose([
                                        Normalizer(),
                                        Augmenter(),
                                        Resizer(min_side=512, max_side=512)
                                    ]))
        dataset_val = CocoDataset(parser.path,
                                  ann_file="instances_val2014.cars.json",
                                  set_name="val2014",
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == "ycb":

        dataset_train = YCBDataset(parser.path,
                                   "image_sets/train.txt",
                                   transform=transforms.Compose([
                                       Normalizer(),
                                       Augmenter(),
                                       Resizer(min_side=512, max_side=512)
                                   ]),
                                   train=True)
        dataset_val = YCBDataset(parser.path,
                                 "image_sets/val.txt",
                                 transform=transforms.Compose(
                                     [Normalizer(), Resizer()]),
                                 train=False)

    elif parser.dataset == "csv":

        if parser.csv_train is None:
            raise ValueError("Must provide --csv_train when training on COCO,")

        if parser.csv_classes is None:
            raise ValueError(
                "Must provide --csv_classes when training on COCO,")

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print("No validation annotations provided.")
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            "Dataset type not understood (must be csv or coco), exiting.")

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=12,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=4,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            "Unsupported model depth, must be one of 18, 34, 50, 101, 152")

    print("CUDA available: {}".format(torch.cuda.is_available()))
    if torch.cuda.is_available():
        device = "cuda"
    else:
        device = "cpu"

    retinanet = retinanet.to(device)

    if parser.distributed:
        retinanet = torch.nn.DataParallel(retinanet)

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    print("Num training images: {}".format(len(dataset_train)))

    best_mean_avg_prec = 0.0

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data["img"].to(device).float(), data["annot"]])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss.item()))
                epoch_loss.append(float(loss.item()))

                if parser.print_every % iter_num == 0:
                    print("Epoch: {} | Iteration: {}/{} | "
                          "Classification loss: {:1.5f} | "
                          "Regression loss: {:1.5f} | "
                          "Running loss: {:1.5f}".format(
                              epoch_num, iter_num, len(dataloader_train),
                              float(classification_loss),
                              float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if ((epoch_num + 1) % parser.evaluate_every
                == 0) or epoch_num + 1 == parser.epochs:

            mAP = 0.0

            if parser.dataset == "coco":

                print("Evaluating dataset")
                mAP = coco_eval.evaluate_coco(dataset_val, retinanet)

            else:
                print("Evaluating dataset")
                AP = eval.evaluate(dataset_val, retinanet)
                mAP = np.asarray([x[0] for x in AP.values()]).mean()
                print("Val set mAP: ", mAP)

            if mAP > best_mean_avg_prec:
                best_mean_avg_prec = mAP
                torch.save(
                    retinanet.state_dict(),
                    "{}_retinanet_best_mean_ap_{}.pt".format(
                        parser.dataset, epoch_num))

        scheduler.step(np.mean(epoch_loss))

    retinanet.eval()

    torch.save(retinanet.state_dict(), "retinanet_model_final.pt")
def main():
    global args, best_sa
    args = parser.parse_args()
    print(args)

    torch.cuda.set_device(int(args.gpu))
    os.makedirs(args.save_dir, exist_ok=True)
    if args.seed:
        setup_seed(args.seed)

    if args.task == 'rotation':
        print('train for rotation classification')
        class_number = 4
    else:
        print('train for supervised classification')
        if args.dataset == 'cifar10':
            class_number = 10
        elif args.dataset == 'fmnist':
            class_number = 10
        elif args.dataset == 'cifar100':
            class_number = 100
        else:
            print('error dataset')
            assert 0

    # prepare dataset 
    if args.dataset == 'cifar10':
        print('training on cifar10 dataset')

        model = resnet18(num_classes=class_number)
        model.normalize = NormalizeByChannelMeanStd(
            mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])
        train_loader, val_loader, test_loader = cifar10_dataloaders(batch_size= args.batch_size, data_dir =args.data)
    
    elif args.dataset == 'cifar_10_10':
        print('training on cifar10 subset')

        model = resnet18(num_classes=class_number)
        model.normalize = NormalizeByChannelMeanStd(
            mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616])
        train_loader, val_loader, test_loader = cifar10_subset_dataloaders(batch_size= args.batch_size, data_dir =args.data)


    elif args.dataset == 'cifar100':

        model = resnet18(num_classes=class_number)
        model.normalize = NormalizeByChannelMeanStd(
            mean=[0.5071, 0.4865, 0.4409], std=[0.2673, 0.2564, 0.2762])
        train_loader, val_loader, test_loader = cifar100_dataloaders(batch_size= args.batch_size, data_dir =args.data)

    elif args.dataset == 'fmnist':

        model = resnet18(num_classes=class_number)
        model.normalize = NormalizeByChannelMeanStd(
            mean=[0.2860], std=[0.3530])
        model.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1, bias=False)
        train_loader, val_loader, test_loader = fashionmnist_dataloaders(batch_size= args.batch_size, data_dir =args.data)

    else:
        print('dataset not support')

    model.cuda()
    criterion = nn.CrossEntropyLoss()
    decreasing_lr = list(map(int, args.decreasing_lr.split(',')))

    if args.prune_type == 'lt':
        print( 'report lottery tickets setting')
        initalization = deepcopy(model.state_dict())
    else:
        initalization = None 

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decreasing_lr, gamma=0.1)


    if args.resume:
        print('resume from checkpoint')
        checkpoint = torch.load(args.resume, map_location = torch.device('cuda:'+str(args.gpu)))
        best_sa = checkpoint['best_sa']
        start_epoch = checkpoint['epoch']
        all_result = checkpoint['result']
        start_state = checkpoint['state']

        if start_state>0:
            current_mask = extract_mask(checkpoint['state_dict'])
            prune_model_custom(model, current_mask)
            check_sparsity(model)

        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
        initalization = checkpoint['init_weight']
        print('loading state:', start_state)
        print('loading from epoch: ',start_epoch, 'best_sa=', best_sa)

    else:
        all_result = {}
        all_result['train'] = []
        all_result['test_ta'] = []
        all_result['ta'] = []

        start_epoch = 0
        start_state = 0

    print('######################################## Start Standard Training Iterative Pruning ########################################')
    print(model.normalize)  


    for state in range(start_state, args.pruning_times):

        print('******************************************')
        print('pruning state', state)
        print('******************************************')
        
        for epoch in range(start_epoch, args.epochs):

            print(optimizer.state_dict()['param_groups'][0]['lr'])
            check_sparsity(model)
            acc = train(train_loader, model, criterion, optimizer, epoch)

            # evaluate on validation set
            tacc = validate(val_loader, model, criterion)
            # evaluate on test set
            test_tacc = validate(test_loader, model, criterion)

            scheduler.step()

            all_result['train'].append(acc)
            all_result['ta'].append(tacc)
            all_result['test_ta'].append(test_tacc)

            # remember best prec@1 and save checkpoint
            is_best_sa = tacc  > best_sa
            best_sa = max(tacc, best_sa)

            save_checkpoint({
                'state': state,
                'result': all_result,
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_sa': best_sa,
                'optimizer': optimizer.state_dict(),
                'scheduler': scheduler.state_dict(),
                'init_weight': initalization
            }, is_SA_best=is_best_sa, pruning=state, save_path=args.save_dir)
        
            plt.plot(all_result['train'], label='train_acc')
            plt.plot(all_result['ta'], label='val_acc')
            plt.plot(all_result['test_ta'], label='test_acc')
            plt.legend()
            plt.savefig(os.path.join(args.save_dir, str(state)+'net_train.png'))
            plt.close()

        #report result
        check_sparsity(model, True)
        print('report best SA={}'.format(best_sa))
        
        all_result = {}
        all_result['train'] = []
        all_result['test_ta'] = []
        all_result['ta'] = []

        best_sa = 0
        start_epoch = 0

        if args.prune_type == 'pt':
            print('report loading pretrained weight')
            initalization = torch.load(os.path.join(args.save_dir, '0model_SA_best.pth.tar'), map_location = torch.device('cuda:'+str(args.gpu)))['state_dict']


        #pruning_model(model, args.rate)
        
        #current_mask = extract_mask(model.state_dict())
        
        #remove_prune(model)

        #rewind weight to init

        pruning_model(model, args.rate)
        check_sparsity(model)
        current_mask = torch.load(os.path.join(args.mask_path, '{}checkpoint.pth.tar'.format(state+1)))['state_dict']
        remove_prune(model)

        #rewind weight to init
        model.load_state_dict(initalization) 
        prune_model_custom(model, current_mask)
        check_sparsity(model)

        optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                    momentum=args.momentum,
                                    weight_decay=args.weight_decay)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=decreasing_lr, gamma=0.1)
Beispiel #29
0
def main(args=None):

    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )

    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)

    parser = parser.parse_args(args)

    # Create the data loaders
    if parser.dataset == 'coco':

        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2017',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2017',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':

        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=2,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=3,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=1,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=3,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    use_gpu = True

    if use_gpu:
        retinanet = retinanet.cuda()

    retinanet = torch.nn.DataParallel(retinanet).cuda()

    retinanet.training = True

    optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)

    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn()

    print('Num training images: {}'.format(len(dataset_train)))

    for epoch_num in range(parser.epochs):

        retinanet.train()
        retinanet.module.freeze_bn()

        epoch_loss = []

        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()

                classification_loss, regression_loss = retinanet(
                    [data['img'].cuda().float(), data['annot']])

                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()

                loss = classification_loss + regression_loss

                if bool(loss == 0):
                    continue

                loss.backward()

                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

                optimizer.step()

                loss_hist.append(float(loss))

                epoch_loss.append(float(loss))

                print(
                    'Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num, iter_num, float(classification_loss),
                            float(regression_loss), np.mean(loss_hist)))

                del classification_loss
                del regression_loss
            except Exception as e:
                print(e)
                continue

        if parser.dataset == 'coco':

            print('Evaluating dataset')

            coco_eval.evaluate_coco(dataset_val, retinanet)

        elif parser.dataset == 'csv' and parser.csv_val is not None:

            print('Evaluating dataset')

            mAP = csv_eval.evaluate(dataset_val, retinanet)

        scheduler.step(np.mean(epoch_loss))

        torch.save(
            retinanet.module,
            '{}_retinanet_dilation_{}.pt'.format(parser.dataset, epoch_num))

    retinanet.eval()

    torch.save(retinanet, 'model_final_dilation.pt'.format(epoch_num))
Beispiel #30
0
def main(args=None):
    parser = argparse.ArgumentParser(
        description='Simple training script for training a RetinaNet network.')

    parser.add_argument('--dataset',
                        help='Dataset type, must be one of csv or coco.')
    parser.add_argument('--coco_path', help='Path to COCO directory')
    parser.add_argument(
        '--csv_train',
        help='Path to file containing training annotations (see readme)')
    parser.add_argument('--csv_classes',
                        help='Path to file containing class list (see readme)')
    parser.add_argument(
        '--csv_val',
        help=
        'Path to file containing validation annotations (optional, see readme)'
    )
    parser.add_argument(
        '--depth',
        help='Resnet depth, must be one of 18, 34, 50, 101, 152',
        type=int,
        default=50)
    parser.add_argument('--epochs',
                        help='Number of epochs',
                        type=int,
                        default=100)
    parser.add_argument('--optimizer',
                        help='[SGD | Adam]',
                        type=str,
                        default='SGD')
    parser.add_argument('--model', help='Path to model (.pt) file.')
    parser = parser.parse_args(args)

    # Create the data loaders
    print("\n[Phase 1]: Creating DataLoader for {} dataset".format(
        parser.dataset))
    if parser.dataset == 'coco':
        if parser.coco_path is None:
            raise ValueError('Must provide --coco_path when training on COCO,')

        dataset_train = CocoDataset(parser.coco_path,
                                    set_name='train2014',
                                    transform=transforms.Compose(
                                        [Normalizer(),
                                         Augmenter(),
                                         Resizer()]))
        dataset_val = CocoDataset(parser.coco_path,
                                  set_name='val2014',
                                  transform=transforms.Compose(
                                      [Normalizer(), Resizer()]))

    elif parser.dataset == 'csv':
        if parser.csv_train is None:
            raise ValueError('Must provide --csv_train when training on COCO,')

        if parser.csv_classes is None:
            raise ValueError(
                'Must provide --csv_classes when training on COCO,')

        dataset_train = CSVDataset(train_file=parser.csv_train,
                                   class_list=parser.csv_classes,
                                   transform=transforms.Compose(
                                       [Normalizer(),
                                        Augmenter(),
                                        Resizer()]))

        if parser.csv_val is None:
            dataset_val = None
            print('No validation annotations provided.')
        else:
            dataset_val = CSVDataset(train_file=parser.csv_val,
                                     class_list=parser.csv_classes,
                                     transform=transforms.Compose(
                                         [Normalizer(),
                                          Resizer()]))

    else:
        raise ValueError(
            'Dataset type not understood (must be csv or coco), exiting.')

    sampler = AspectRatioBasedSampler(dataset_train,
                                      batch_size=8,
                                      drop_last=False)
    dataloader_train = DataLoader(dataset_train,
                                  num_workers=8,
                                  collate_fn=collater,
                                  batch_sampler=sampler)

    if dataset_val is not None:
        sampler_val = AspectRatioBasedSampler(dataset_val,
                                              batch_size=16,
                                              drop_last=False)
        dataloader_val = DataLoader(dataset_val,
                                    num_workers=8,
                                    collate_fn=collater,
                                    batch_sampler=sampler_val)

    # Create the model
    if parser.depth == 18:
        retinanet = model.resnet18(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 34:
        retinanet = model.resnet34(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 50:
        retinanet = model.resnet50(num_classes=dataset_train.num_classes(),
                                   pretrained=True)
    elif parser.depth == 101:
        retinanet = model.resnet101(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    elif parser.depth == 152:
        retinanet = model.resnet152(num_classes=dataset_train.num_classes(),
                                    pretrained=True)
    else:
        raise ValueError(
            'Unsupported model depth, must be one of 18, 34, 50, 101, 152')

    print('| Num training images: {}'.format(len(dataset_train)))
    print('| Num test images : {}'.format(len(dataset_val)))

    print("\n[Phase 2]: Preparing RetinaNet Detection Model...")
    use_gpu = torch.cuda.is_available()
    if use_gpu:
        device = torch.device('cuda')
        retinanet = retinanet.to(device)

    retinanet = torch.nn.DataParallel(retinanet,
                                      device_ids=range(
                                          torch.cuda.device_count()))
    print("| Using %d GPUs for Train/Validation!" % torch.cuda.device_count())
    retinanet.training = True

    if parser.optimizer == 'Adam':
        optimizer = optim.Adam(retinanet.parameters(),
                               lr=1e-5)  # not mentioned
        print("| Adam Optimizer with Learning Rate = {}".format(1e-5))
    elif parser.optimizer == 'SGD':
        optimizer = optim.SGD(retinanet.parameters(),
                              lr=1e-2,
                              momentum=0.9,
                              weight_decay=1e-4)
        print("| SGD Optimizer with Learning Rate = {}".format(1e-2))
    else:
        raise ValueError('Unsupported Optimizer, must be one of [SGD | Adam]')

    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                     patience=3,
                                                     verbose=True)
    loss_hist = collections.deque(maxlen=500)

    retinanet.train()
    retinanet.module.freeze_bn(
    )  # Freeze the BN parameters to ImageNet configuration

    # Check if there is a 'checkpoints' path
    if not osp.exists('./checkpoints/'):
        os.makedirs('./checkpoints/')

    print("\n[Phase 3]: Training Model on {} dataset...".format(
        parser.dataset))
    for epoch_num in range(parser.epochs):
        epoch_loss = []
        for iter_num, data in enumerate(dataloader_train):
            try:
                optimizer.zero_grad()
                classification_loss, regression_loss = retinanet(
                    [data['img'].to(device), data['annot']])
                classification_loss = classification_loss.mean()
                regression_loss = regression_loss.mean()
                loss = classification_loss + regression_loss
                if bool(loss == 0):
                    continue

                loss.backward()
                torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.001)
                optimizer.step()
                loss_hist.append(float(loss))
                epoch_loss.append(float(loss))

                sys.stdout.write('\r')
                sys.stdout.write(
                    '| Epoch: {} | Iteration: {}/{} | Classification loss: {:1.5f} | Regression loss: {:1.5f} | Running loss: {:1.5f}'
                    .format(epoch_num + 1, iter_num + 1, len(dataloader_train),
                            float(classification_loss), float(regression_loss),
                            np.mean(loss_hist)))
                sys.stdout.flush()

                del classification_loss
                del regression_loss

            except Exception as e:
                print(e)
                continue

        print("\n| Saving current best model at epoch {}...".format(epoch_num +
                                                                    1))
        torch.save(
            retinanet.state_dict(),
            './checkpoints/{}_retinanet_{}.pt'.format(parser.dataset,
                                                      epoch_num + 1))

        if parser.dataset == 'coco':
            #print('Evaluating dataset')
            coco_eval.evaluate_coco(dataset_val, retinanet, device)

        elif parser.dataset == 'csv' and parser.csv_val is not None:
            #print('Evaluating dataset')
            mAP = csv_eval.evaluate(dataset_val, retinanet, device)

        scheduler.step(np.mean(epoch_loss))

    retinanet.eval()
    torch.save(retinanet.state_dict(), './checkpoints/model_final.pt')