Exemple #1
0
def main():
    output_file = 'vgg19_sparse_model.dat'
    batch_size = 128
    epoch_count = 600

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    transform_val = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    trainset = torchvision.datasets.CIFAR10(root='./',
                                            train=True,
                                            download=True,
                                            transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=4)

    testset = torchvision.datasets.CIFAR10(root='./',
                                           train=False,
                                           download=True,
                                           transform=transform_val)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=32,
                                             shuffle=False,
                                             num_workers=2)

    conv_net = vgg19_bn(num_classes=10).cuda()
    conv_net.train()
    criterion = nn.CrossEntropyLoss()

    init_lr = 1.0
    lam = 1e-6
    av_param = 0.0
    training_specs = CosineSpecs(max_iter=math.ceil(50000 / batch_size) *
                                 epoch_count,
                                 init_step_size=init_lr,
                                 mom_ts=10.0,
                                 b_mom_ts=10.0,
                                 weight_decay=5e-4)
    optimizer = xRDA(conv_net.parameters(),
                     it_specs=training_specs,
                     prox=l1_prox(lam=lam, maximum_factor=500))

    lr = init_lr
    prev_train_acc = 0
    prev_sparsity = 0
    for epoch in range(epoch_count):
        total = 0
        correct = 0
        for data in trainloader:
            # get the inputs
            inputs, labels = data
            inputs = Variable(inputs).cuda()
            labels = Variable(labels).cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = conv_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Calculate train accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()

        train_acc = correct
        sparsity = sum(
            torch.nonzero(x).size()[0] for x in list(conv_net.parameters()))
        accuracy = 10000 * correct / total
        t_accuracy = test_accuracy(testloader, conv_net, cuda=True)
        print(
            'Training Accuracy: %d.%02d %% Test Accuracy: %d.%02d %% Sparsity: %d'
            % (accuracy / 100, accuracy % 100, t_accuracy / 100,
               t_accuracy % 100, sparsity))

    # Calculate accuracy and save output.
    final_accuracy = test_accuracy(testloader, conv_net, cuda=True)
    print('Accuracy of the network on the 10000 test images: %d.%02d %%' %
          (final_accuracy / 100, final_accuracy % 100))
    torch.save(conv_net, output_file)
def main():
    output_file = 'vgg19_sparse_model_ciafr100.dat'

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    transform_val = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    trainset = torchvision.datasets.CIFAR100(root='./',
                                             train=True,
                                             download=True,
                                             transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=128,
                                              shuffle=True,
                                              num_workers=6)

    testset = torchvision.datasets.CIFAR100(root='./',
                                            train=False,
                                            download=True,
                                            transform=transform_val)
    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=32,
                                             shuffle=False,
                                             num_workers=2)

    conv_net = vgg19_bn(num_classes=100).cuda()
    conv_net.train()
    criterion = nn.CrossEntropyLoss()

    init_lr = 1.0
    lam = 1e-6
    av_param = 0.0
    training_specs = IterationSpecs(step_size=init_lr,
                                    mom_ts=9.5,
                                    b_mom_ts=9.5,
                                    weight_decay=5e-4,
                                    av_param=av_param)
    optimizer = xRDA(conv_net.parameters(),
                     it_specs=training_specs,
                     prox=l1_prox(lam=lam, maximum_factor=500))

    lr = init_lr
    prev_train_acc = 0
    prev_sparsity = 0
    for epoch in range(500):
        total = 0
        correct = 0
        for data in trainloader:
            # get the inputs
            inputs, labels = data
            inputs = Variable(inputs).cuda()
            labels = Variable(labels).cuda()

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = conv_net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # Calculate train accuracy
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum()

        train_acc = correct
        sparsity = sum(
            torch.nonzero(x).size()[0] for x in list(conv_net.parameters()))
        accuracy = 10000 * correct / total
        t_accuracy = test_accuracy(testloader, conv_net, cuda=True)
        print(
            'Epoch:%d %% Training Accuracy: %d.%02d %% Test Accuracy: %d.%02d %% Sparsity: %d'
            % (epoch + 1, accuracy / 100, accuracy % 100, t_accuracy / 100,
               t_accuracy % 100, sparsity))

        # At about every 40 epochs, halve step size and double averaging.
        if epoch in [60, 100, 140, 180, 220, 260, 300, 340, 380, 420]:
            lr /= 2
            training_specs.set_step_size(lr)
            av_param = 1.0 - (1.0 - av_param) / 2.0
            training_specs.set_av_param(av_param)

    # Calculate accuracy and save output.
    final_accuracy = test_accuracy(testloader, conv_net, cuda=True)
    print('Accuracy of the network on the 10000 test images: %d.%02d %%' %
          (final_accuracy / 100, final_accuracy % 100))
    torch.save(conv_net, output_file)
Exemple #3
0
def main():
    global args, best_prec1
    args = parser.parse_args()

    if args.seed is not None:
        random.seed(args.seed)
        torch.manual_seed(args.seed)
        cudnn.deterministic = True
        warnings.warn('You have chosen to seed training. '
                      'This will turn on the CUDNN deterministic setting, '
                      'which can slow down your training considerably! '
                      'You may see unexpected behavior when restarting '
                      'from checkpoints.')

    if args.gpu is not None:
        warnings.warn('You have chosen a specific GPU. This will completely '
                      'disable data parallelism.')

    if not os.path.exists(args.save):
        os.makedirs(args.save)

    args.distributed = args.world_size > 1

    if args.distributed:
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_url,
                                world_size=args.world_size)

    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch](depth=40, dataset='cifar100')

    if args.gpu is not None:
        model = model.cuda(args.gpu)
    elif args.distributed:
        model.cuda()
        model = torch.nn.parallel.DistributedDataParallel(model)
    else:
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            model.load_state_dict(checkpoint['state_dict'])
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    transform_val = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010))
    ])

    trainset = torchvision.datasets.CIFAR100(root='./',
                                             train=True,
                                             download=True,
                                             transform=transform_train)
    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=4)

    valset = torchvision.datasets.CIFAR100(root='./',
                                           train=False,
                                           download=True,
                                           transform=transform_val)
    val_loader = torch.utils.data.DataLoader(valset,
                                             batch_size=128,
                                             shuffle=False,
                                             num_workers=4)

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda(args.gpu)

    training_specs = CosineSpecs(
        max_iter=math.ceil(len(trainset) / args.batch_size) * args.epochs,
        init_step_size=args.lr,
        mom_ts=args.momentum,
        b_mom_ts=args.momentum,
        weight_decay=args.weight_decay)
    optimizer = xRDA(model.parameters(),
                     it_specs=training_specs,
                     prox=l1_prox(lam=args.lam,
                                  maximum_factor=500,
                                  mode='channel'))

    if args.evaluate:
        validate(val_loader, model, criterion)
        return

    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        #####################################################################################################
        num_zero_parameters = get_conv_zero_param(model)
        print('Zero parameters: {}'.format(num_zero_parameters))
        num_parameters = sum(
            [param.nelement() for param in model.parameters()])
        print('Parameters: {}'.format(num_parameters))
        #####################################################################################################

        # train for one epoch
        loss, prec1_train = train(train_loader, model, criterion, optimizer,
                                  epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
                'optimizer': optimizer.state_dict(),
            },
            is_best,
            checkpoint=args.save,
            args=args)

        with open(
                os.path.join(
                    args.save,
                    'densenet_cifar100_results_lr%.4f_lam%.8f_mom%.6f.txt' %
                    (args.lr, args.lam, args.momentum)), "a+") as text_file:
            text_file.write(
                str(epoch + 1) + ' ' + '%.3f' % (loss.detach().cpu().numpy()) +
                ' ' + '%.2f' % (prec1_train.detach().cpu().numpy()) + ' ' +
                '%.2f' % (prec1.detach().cpu().numpy()) + ' ' + '%d' %
                (num_zero_parameters) + '\n')
    return