Esempio n. 1
0
def main(args):
    train_loader, test_loader, channels = load_dataset(args.label, args.batch_size, args.mnist)
    model = ShakeResNet(args.depth, args.w_base, args.label, args.use_shakeshake, args.act_type, channels)
    
    params = sum(torch.numel(p) for p in model.parameters() if p.requires_grad)
    print("Parameters: %d" % params)
    
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benckmark = True

    opt = optim.SGD(model.parameters(),
                    lr=args.lr,
                    momentum=0.9,
                    weight_decay=args.weight_decay,
                    nesterov=args.nesterov)
    loss_func = nn.CrossEntropyLoss().cuda()

    headers = ["Epoch", "LearningRate", "TrainLoss", "TestLoss", "TrainAcc.", "TestAcc."]
    logger = utils.Logger(args.checkpoint, headers)
    for e in range(args.epochs):
        lr = utils.cosine_lr(opt, args.lr, e, args.epochs)
        model.train()
        train_loss, train_acc, train_n = 0, 0, 0
        bar = tqdm(total=len(train_loader), leave=False)
        for x, t in train_loader:
            x, t = Variable(x.cuda()), Variable(t.cuda())
            y = model(x)
            loss = loss_func(y, t)
            opt.zero_grad()
            loss.backward()
            opt.step()

            train_acc += utils.accuracy(y, t).item()
            train_loss += loss.item() * t.size(0)
            train_n += t.size(0)
            bar.set_description("Loss: {:.4f}, Accuracy: {:.2f}".format(
                train_loss / train_n, train_acc / train_n * 100), refresh=True)
            bar.update()
        bar.close()

        model.eval()
        test_loss, test_acc, test_n = 0, 0, 0
        for x, t in tqdm(test_loader, total=len(test_loader), leave=False):
            with torch.no_grad():
                x, t = Variable(x.cuda()), Variable(t.cuda())
                y = model(x)
                loss = loss_func(y, t)
                test_loss += loss.item() * t.size(0)
                test_acc += utils.accuracy(y, t).item()
                test_n += t.size(0)
        logger.write(e+1, lr, train_loss / train_n, test_loss / test_n,
                     train_acc / train_n * 100, test_acc / test_n * 100)
    
    final_score = test_acc/test_n*100
    
    with open("history.txt", "a") as f:
        command = " ".join(sys.argv)
        f.write("%s\nParameters: %d\nFinal test accuracy: %.5f\n" % (command, params, final_score))
Esempio n. 2
0
def main(args):
    train_loader, test_loader = load_dataset(
        args.label, args.batch_size, root="/home/ubuntu/data1.5TB/cifar")
    if args.label == 10:
        model = ShakeResNet(args.depth, args.w_base, args.label)
    else:
        model = ShakeResNeXt(args.depth, args.w_base, args.cardinary,
                             args.label)
    model = torch.nn.DataParallel(model).cuda()
    cudnn.benckmark = True

    opt = optim.SGD(model.parameters(),
                    lr=args.lr,
                    momentum=0.9,
                    weight_decay=args.weight_decay,
                    nesterov=args.nesterov)
    loss_func = nn.CrossEntropyLoss().cuda()

    headers = [
        "Epoch", "LearningRate", "TrainLoss", "TestLoss", "TrainAcc.",
        "TestAcc."
    ]
    logger = utils.Logger(args.checkpoint, headers)
    for e in range(args.epochs):
        lr = utils.cosine_lr(opt, args.lr, e, args.epochs)
        model.train()
        train_loss, train_acc, train_n = 0, 0, 0
        bar = tqdm(total=len(train_loader), leave=False)
        for x, t in train_loader:
            x, t = Variable(x.cuda()), Variable(t.cuda())
            y = model(x)
            loss = loss_func(y, t)
            opt.zero_grad()
            loss.backward()
            opt.step()

            train_acc += utils.accuracy(y, t).item()
            train_loss += loss.item() * t.size(0)
            train_n += t.size(0)
            bar.set_description("Loss: {:.4f}, Accuracy: {:.2f}".format(
                train_loss / train_n, train_acc / train_n * 100),
                                refresh=True)
            bar.update()
        bar.close()

        model.eval()
        test_loss, test_acc, test_n = 0, 0, 0
        for x, t in tqdm(test_loader, total=len(test_loader), leave=False):
            with torch.no_grad():
                x, t = Variable(x.cuda()), Variable(t.cuda())
                y = model(x)
                loss = loss_func(y, t)
                test_loss += loss.item() * t.size(0)
                test_acc += utils.accuracy(y, t).item()
                test_n += t.size(0)
        logger.write(e + 1, lr, train_loss / train_n, test_loss / test_n,
                     train_acc / train_n * 100, test_acc / test_n * 100)
Esempio n. 3
0
def main(args):
    #make sure cifar-10, cifar-100, STL-10 and Imagenet 32x32 downloaded in this path, 
    # otherwise change the download to True in load_dataset function
    path = expanduser("~") + "/project/data" #the path of the data
    train_loader, test_loader = load_dataset(args.label, args.batch_size, path, args.dataset, download=False)
    cifarlike = (args.dataset=='cifar') 
    if args.model =="shake":
        model = ShakeResNet(args.convBN, args.depth, args.factor, args.label, cifar=cifarlike)
    elif args.model == "senet56":
        model = se_resnet56(num_classes = args.label, reduction=8, convBN = args.convBN, cifar=cifarlike)
    elif args.model == 'resnet20':
        model = resnet20(num_classes = args.label, convBN = args.convBN, cifar=cifarlike)
    elif args.model == 'resnet50':
        model = resnet50(num_classes = args.label, convBN = args.convBN, cifar=cifarlike)
    elif args.model == 'mobilenet':
        if cifarlike and args.convBN.__name__=='conv3x3_bn':
            print("Small image input or standard convolution is not supported in MobileNet!")
            return
        model = MobileNetV2(num_classes = args.label, convBN = args.convBN, large=False)
    else:
        print("Invalid model!")
        return

    model = torch.nn.DataParallel(model).cuda()
    cudnn.benckmark = True
    print('Number of model parameters: {}'.format(
            sum([p.data.nelement() for p in model.parameters()])))

    opt = optim.SGD(model.parameters(),
                    lr=args.lr,
                    momentum=0.9,
                    weight_decay=args.weight_decay,
                    nesterov=args.nesterov)
    loss_func = nn.CrossEntropyLoss().cuda()

    headers = ["Epoch", "LearningRate", "TrainLoss", "TestLoss", "TrainAcc.", "TestAcc."]
    logger = utils.Logger(args.log_path, args.log_file, headers)

    #let's train and test the model
    for e in range(args.epochs):
        lr = utils.cosine_lr(opt, args.lr, e, args.epochs)
        model.train()
        train_loss, train_acc, train_n = 0, 0, 0
        for x, t in train_loader:
            x, t = Variable(x.cuda()), Variable(t.cuda())
            y = model(x)
            loss = loss_func(y, t)
            opt.zero_grad()
            loss.backward()
            opt.step()

            train_acc += utils.accuracy(y, t).item()
            train_loss += loss.item() * t.size(0)
            train_n += t.size(0)

        model.eval()
        test_loss, test_acc, test_n = 0, 0, 0
        for x, t in tqdm(test_loader, total=len(test_loader), leave=False):
            with torch.no_grad():
                x, t = Variable(x.cuda()), Variable(t.cuda())
                y = model(x)
                loss = loss_func(y, t)
                test_loss += loss.item() * t.size(0)
                test_acc += utils.accuracy(y, t).item()
                test_n += t.size(0)
        logger.write(e+1, lr, train_loss / train_n, test_loss / test_n,
                     train_acc / train_n * 100, test_acc / test_n * 100)
Esempio n. 4
0
def main(args):

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")

    train_loader, test_loader = load_dataset(args.label, args.batch_size,
                                             args.half_length, args.nholes)

    if args.label == 10:
        model = ShakeResNet(args.depth, args.w_base, args.label)
    else:
        model = ShakeResNeXt(args.depth, args.w_base, args.cardinary,
                             args.label)

    model = torch.nn.DataParallel(model).cuda()

    cudnn.benckmark = True

    if args.optimizer == 'sgd':
        print("using sgd")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay,
                        nesterov=args.nesterov)

    elif args.optimizer == 'abd':
        print("using adabound")
        opt = abd.AdaBound(model.parameters(),
                           lr=args.lr,
                           gamma=args.gamma,
                           weight_decay=args.weight_decay,
                           final_lr=args.final_lr)

    elif args.optimizer == 'swa':
        print("using swa")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr,
                        momentum=args.momentum,
                        weight_decay=args.weight_decay)
        steps_per_epoch = len(train_loader.dataset) / args.batch_size
        steps_per_epoch = int(steps_per_epoch)
        opt = swa(opt,
                  swa_start=args.swa_start * steps_per_epoch,
                  swa_freq=steps_per_epoch,
                  swa_lr=args.swa_lr)
    else:
        print("not valid optimizer")
        exit

    loss_func = nn.CrossEntropyLoss().cuda()

    headers = [
        "Epoch", "LearningRate", "TrainLoss", "TestLoss", "TrainAcc.",
        "TestAcc."
    ]

    #if args.optimizer=='swa':
    #   headers = headers[:-1] + ['swa_te_loss', 'swa_te_acc'] + headers[-1:]
    #  swa_res = {'loss': None, 'accuracy': None}

    logger = utils.Logger(args.checkpoint, headers, mod=args.optimizer)

    for e in range(args.epochs):

        if args.optimizer == 'swa':
            lr = utils.schedule(e, args.optimizer, args.epochs, args.swa_start,
                                args.swa_lr, args.lr)
            utils.adjust_learning_rate(opt, lr)
        elif args.optimizer == 'sgd':
            lr = utils.cosine_lr(opt, args.lr, e, args.epochs)
        else:
            exit

        #train
        train_loss, train_acc, train_n = utils.train_epoch(
            train_loader, model, opt)
        #eval
        test_loss, test_acc, test_n = utils.eval_epoch(test_loader, model)

        logger.write(e + 1, lr, train_loss / train_n, test_loss / test_n,
                     train_acc / train_n * 100, test_acc / test_n * 100)

        if args.optimizer == 'swa' and (
                e + 1) >= args.swa_start and args.eval_freq > 1:
            if e == 0 or e % args.eval_freq == args.eval_freq - 1 or e == args.epochs - 1:
                opt.swap_swa_sgd()
                opt.bn_update(train_loaders, model, device='cuda')
                #swa_res = utils.eval_epoch(test_loaders['test'], model)
                opt.swap_swa_sgd()