Example #1
0
def _densenet(branches, arch, growth_rate, block_config, num_init_features,
              pretrained, progress, **kwargs):
    model_mtl = MTLDenseNet(branches, growth_rate, block_config,
                            num_init_features, **kwargs)
    model = DenseNet(growth_rate, block_config, num_init_features, **kwargs)

    if pretrained:
        _load_state_dict(model, model_urls[arch], progress)
        state_dict = model.state_dict()

        own_state = model_mtl.state_dict()
        pretrained_dict = {
            k: v
            for k, v in state_dict.items() if k in own_state
        }
        # 2. overwrite entries in the existing state dict
        own_state.update(pretrained_dict)
        # 3. load the new state dict
        model_mtl.load_state_dict(own_state)
    return model_mtl
Example #2
0
            nothot_acc = evaluate(model, test_nothot_dl, args)['accuracytop1']
            hot_acc = 0.0
            for i in range(len(test_hot_dl)):
                hot_acc += evaluate(model, test_hot_dl[i], args)['accuracytop1']
            hot_acc = hot_acc/float(len(test_hot_dl))
            print(nothot_acc, hot_acc)

            val_acc = 0.5*nothot_acc + 0.5*hot_acc

            is_best = (val_acc > best_val_acc)

            if is_best:
                best_val_acc = val_acc
            print(is_best)
            state = {'epoch': epoch,
                     'model_state_dict': model.state_dict(),
                     'optimizer_state_dict': optimizer.state_dict(),
                     'acc': val_acc}
            save_checkpoint(state, epoch, is_best, args.save_dir)




    if args.mode == 'test':

        load_checkpoint(args.load_dir + '/best.pth.tar', model)

        # mean_metrics = evaluate(model, train_dl, args)
        # mean_metrics = evaluate(model, test_dl, args)
        # mean_metrics = evaluate(model, test_sensitive_dl, args)
        # mean_metrics = evaluate(model, test_insensitive_dl, args)
Example #3
0
def main():
    global best_top1, best_top5

    start_epoch = args.start_epoch  # start from epoch 0 or last checkpoint epoch

    if not os.path.isdir(args.checkpoint):
        mkdir_p(args.checkpoint)

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    validdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_transform = transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ])

    val_transform = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        normalize,
    ])

    train_data = imagenet_lmdb_dataset(traindir, transform=train_transform)
    valid_data = imagenet_lmdb_dataset(validdir, transform=val_transform)

    train_sampler = torch.utils.data.distributed.DistributedSampler(train_data)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.train_batch,
                                               shuffle=(train_sampler is None),
                                               pin_memory=True,
                                               num_workers=8,
                                               sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(valid_data,
                                             batch_size=args.test_batch,
                                             shuffle=False,
                                             pin_memory=True,
                                             num_workers=8)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    elif args.arch.startswith('resnext'):
        model = models.__dict__[args.arch](
            baseWidth=args.base_width,
            cardinality=args.cardinality,
        )
    elif args.arch == 'densenet264':
        model = DenseNet(growth_rate=32,
                         block_config=(6, 12, 64, 48),
                         num_init_features=64,
                         bn_size=4,
                         drop_rate=0,
                         num_classes=1000,
                         memory_efficient=False)
    elif args.arch == 'resnet200':
        model = ResNet(block=Bottleneck,
                       layers=[3, 24, 36, 3],
                       num_classes=1000,
                       zero_init_residual=False,
                       groups=1,
                       width_per_group=64,
                       replace_stride_with_dilation=None,
                       norm_layer=None)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
        model.features = DDP(model.features)
        model.cuda()
    else:
        model = model.cuda()
        model = DDP(model, delay_allreduce=True)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    if args.optimizer.lower() == 'sgd':
        optimizer = optim.SGD(model.parameters(),
                              lr=args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)
    elif args.optimizer.lower() == 'adamw':
        optimizer = AdamW(model.parameters(),
                          lr=args.lr,
                          betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay,
                          warmup=0)
    elif args.optimizer.lower() == 'radam':
        optimizer = RAdam(model.parameters(),
                          lr=args.lr,
                          betas=(args.beta1, args.beta2),
                          weight_decay=args.weight_decay)
    elif args.optimizer.lower() == 'lsadam':
        optimizer = LSAdamW(model.parameters(),
                            lr=args.lr * ((1. + 4. * args.sigma)**(0.25)),
                            betas=(args.beta1, args.beta2),
                            weight_decay=args.weight_decay,
                            sigma=args.sigma)
    elif args.optimizer.lower() == 'lsradam':
        sigma = 0.1
        optimizer = LSRAdam(model.parameters(),
                            lr=args.lr * ((1. + 4. * args.sigma)**(0.25)),
                            betas=(args.beta1, args.beta2),
                            weight_decay=args.weight_decay,
                            sigma=args.sigma)
    elif args.optimizer.lower() == 'srsgd':
        iter_count = 1
        optimizer = SGD_Adaptive(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay,
                                 iter_count=iter_count,
                                 restarting_iter=args.restart_schedule[0])
    elif args.optimizer.lower() == 'sradam':
        iter_count = 1
        optimizer = SRNAdam(model.parameters(),
                            lr=args.lr,
                            betas=(args.beta1, args.beta2),
                            iter_count=iter_count,
                            weight_decay=args.weight_decay,
                            restarting_iter=args.restart_schedule[0])
    elif args.optimizer.lower() == 'sradamw':
        iter_count = 1
        optimizer = SRAdamW(model.parameters(),
                            lr=args.lr,
                            betas=(args.beta1, args.beta2),
                            iter_count=iter_count,
                            weight_decay=args.weight_decay,
                            warmup=0,
                            restarting_iter=args.restart_schedule[0])
    elif args.optimizer.lower() == 'srradam':
        #NOTE: need to double-check this
        iter_count = 1
        optimizer = SRRAdam(model.parameters(),
                            lr=args.lr,
                            betas=(args.beta1, args.beta2),
                            iter_count=iter_count,
                            weight_decay=args.weight_decay,
                            warmup=0,
                            restarting_iter=args.restart_schedule[0])

    schedule_index = 1
    # Resume
    title = 'ImageNet-' + args.arch
    if args.resume:
        # Load checkpoint.
        print('==> Resuming from checkpoint..')
        assert os.path.isfile(
            args.resume), 'Error: no checkpoint directory found!'
        # args.checkpoint = os.path.dirname(args.resume)
        # checkpoint = torch.load(args.resume, map_location = lambda storage, loc: storage.cuda(args.local_rank))
        checkpoint = torch.load(args.resume, map_location=torch.device('cpu'))
        best_top1 = checkpoint['best_top1']
        best_top5 = checkpoint['best_top5']
        start_epoch = checkpoint['epoch']
        model.load_state_dict(checkpoint['state_dict'])
        optimizer.load_state_dict(checkpoint['optimizer'])
        if args.optimizer.lower() == 'srsgd' or args.optimizer.lower(
        ) == 'sradam' or args.optimizer.lower(
        ) == 'sradamw' or args.optimizer.lower() == 'srradam':
            iter_count = optimizer.param_groups[0]['iter_count']
        schedule_index = checkpoint['schedule_index']
        state['lr'] = optimizer.param_groups[0]['lr']
        if args.checkpoint == args.resume:
            logger = LoggerDistributed(os.path.join(args.checkpoint,
                                                    'log.txt'),
                                       rank=args.local_rank,
                                       title=title,
                                       resume=True)
        else:
            logger = LoggerDistributed(os.path.join(args.checkpoint,
                                                    'log.txt'),
                                       rank=args.local_rank,
                                       title=title)
            if args.local_rank == 0:
                logger.set_names([
                    'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Top1',
                    'Valid Top1', 'Train Top5', 'Valid Top5'
                ])
    else:
        logger = LoggerDistributed(os.path.join(args.checkpoint, 'log.txt'),
                                   rank=args.local_rank,
                                   title=title)
        if args.local_rank == 0:
            logger.set_names([
                'Learning Rate', 'Train Loss', 'Valid Loss', 'Train Top1',
                'Valid Top1', 'Train Top5', 'Valid Top5'
            ])

    if args.local_rank == 0:
        logger.file.write('    Total params: %.2fM' %
                          (sum(p.numel()
                               for p in model.parameters()) / 1000000.0))

    if args.evaluate:
        if args.local_rank == 0:
            logger.file.write('\nEvaluation only')
        test_loss, test_top1, test_top5 = test(val_loader, model, criterion,
                                               start_epoch, use_cuda, logger)
        if args.local_rank == 0:
            logger.file.write(
                ' Test Loss:  %.8f, Test Top1:  %.2f, Test Top5: %.2f' %
                (test_loss, test_top1, test_top5))
        return

    # Train and val
    for epoch in range(start_epoch, args.epochs):
        # Shuffle the sampler.
        train_loader.sampler.set_epoch(epoch + args.manualSeed)

        if args.optimizer.lower() == 'srsgd':
            if epoch in args.schedule:
                optimizer = SGD_Adaptive(
                    model.parameters(),
                    lr=args.lr * (args.gamma**schedule_index),
                    weight_decay=args.weight_decay,
                    iter_count=iter_count,
                    restarting_iter=args.restart_schedule[schedule_index])
                schedule_index += 1

        elif args.optimizer.lower() == 'sradam':
            if epoch in args.schedule:
                optimizer = SRNAdam(
                    model.parameters(),
                    lr=args.lr * (args.gamma**schedule_index),
                    betas=(args.beta1, args.beta2),
                    iter_count=iter_count,
                    weight_decay=args.weight_decay,
                    restarting_iter=args.restart_schedule[schedule_index])
                schedule_index += 1

        elif args.optimizer.lower() == 'sradamw':
            if epoch in args.schedule:
                optimizer = SRAdamW(
                    model.parameters(),
                    lr=args.lr * (args.gamma**schedule_index),
                    betas=(args.beta1, args.beta2),
                    iter_count=iter_count,
                    weight_decay=args.weight_decay,
                    warmup=0,
                    restarting_iter=args.restart_schedule[schedule_index])
                schedule_index += 1

        elif args.optimizer.lower() == 'srradam':
            if epoch in args.schedule:
                optimizer = SRRAdam(
                    model.parameters(),
                    lr=args.lr * (args.gamma**schedule_index),
                    betas=(args.beta1, args.beta2),
                    iter_count=iter_count,
                    weight_decay=args.weight_decay,
                    warmup=0,
                    restarting_iter=args.restart_schedule[schedule_index])
                schedule_index += 1

        else:
            adjust_learning_rate(optimizer, epoch)

        if args.local_rank == 0:
            logger.file.write('\nEpoch: [%d | %d] LR: %f' %
                              (epoch + 1, args.epochs, state['lr']))

        if args.optimizer.lower() == 'srsgd' or args.optimizer.lower(
        ) == 'sradam' or args.optimizer.lower(
        ) == 'sradamw' or args.optimizer.lower() == 'srradam':
            train_loss, train_top1, train_top5, iter_count = train(
                train_loader, model, criterion, optimizer, epoch, use_cuda,
                logger)
        else:
            train_loss, train_top1, train_top5 = train(train_loader, model,
                                                       criterion, optimizer,
                                                       epoch, use_cuda, logger)

        test_loss, test_top1, test_top5 = test(val_loader, model, criterion,
                                               epoch, use_cuda, logger)

        # append logger file
        if args.local_rank == 0:
            logger.append([
                state['lr'], train_loss, test_loss, train_top1, test_top1,
                train_top5, test_top5
            ])
            writer.add_scalars('train_loss', {args.model_name: train_loss},
                               epoch)
            writer.add_scalars('test_loss', {args.model_name: test_loss},
                               epoch)
            writer.add_scalars('train_top1', {args.model_name: train_top1},
                               epoch)
            writer.add_scalars('test_top1', {args.model_name: test_top1},
                               epoch)
            writer.add_scalars('train_top5', {args.model_name: train_top5},
                               epoch)
            writer.add_scalars('test_top5', {args.model_name: test_top5},
                               epoch)

        # save model
        is_best = test_top1 > best_top1
        best_top1 = max(test_top1, best_top1)
        best_top5 = max(test_top5, best_top5)
        if args.local_rank == 0:
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'schedule_index': schedule_index,
                    'state_dict': model.state_dict(),
                    'top1': test_top1,
                    'top5': test_top5,
                    'best_top1': best_top1,
                    'best_top5': best_top5,
                    'optimizer': optimizer.state_dict(),
                },
                is_best,
                epoch,
                checkpoint=args.checkpoint)

            if epoch == args.schedule[-1]:
                logger.file.write('Best top1: %f at epoch %i' %
                                  (best_top1, epoch))
                logger.file.write('Best top5: %f at epoch %i' %
                                  (best_top5, epoch))
                print('Best top1: %f at epoch %i' % (best_top1, epoch))
                print('Best top5: %f at epoch %i' % (best_top5, epoch))
                with open("./all_results_imagenet.txt", "a") as f:
                    fcntl.flock(f, fcntl.LOCK_EX)
                    f.write("%s\n" % args.checkpoint)
                    f.write("best_top1 %f, best_top5 %f at epoch %i\n\n" %
                            (best_top1, best_top5, epoch))
                    fcntl.flock(f, fcntl.LOCK_UN)

    if args.local_rank == 0:
        logger.file.write('Best top1: %f' % best_top1)
        logger.file.write('Best top5: %f' % best_top5)
        logger.close()
        logger.plot()
        savefig(os.path.join(args.checkpoint, 'log.eps'))
        print('Best top1: %f' % best_top1)
        print('Best top5: %f' % best_top5)
        with open("./all_results_imagenet.txt", "a") as f:
            fcntl.flock(f, fcntl.LOCK_EX)
            f.write("%s\n" % args.checkpoint)
            f.write("best_top1 %f, best_top5 %f\n\n" % (best_top1, best_top5))
            fcntl.flock(f, fcntl.LOCK_UN)