def main(args): train_loader, test_loader, channels = load_dataset(args.label, args.batch_size, args.mnist) model = ShakeResNet(args.depth, args.w_base, args.label, args.use_shakeshake, args.act_type, channels) params = sum(torch.numel(p) for p in model.parameters() if p.requires_grad) print("Parameters: %d" % params) model = torch.nn.DataParallel(model).cuda() cudnn.benckmark = True opt = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay, nesterov=args.nesterov) loss_func = nn.CrossEntropyLoss().cuda() headers = ["Epoch", "LearningRate", "TrainLoss", "TestLoss", "TrainAcc.", "TestAcc."] logger = utils.Logger(args.checkpoint, headers) for e in range(args.epochs): lr = utils.cosine_lr(opt, args.lr, e, args.epochs) model.train() train_loss, train_acc, train_n = 0, 0, 0 bar = tqdm(total=len(train_loader), leave=False) for x, t in train_loader: x, t = Variable(x.cuda()), Variable(t.cuda()) y = model(x) loss = loss_func(y, t) opt.zero_grad() loss.backward() opt.step() train_acc += utils.accuracy(y, t).item() train_loss += loss.item() * t.size(0) train_n += t.size(0) bar.set_description("Loss: {:.4f}, Accuracy: {:.2f}".format( train_loss / train_n, train_acc / train_n * 100), refresh=True) bar.update() bar.close() model.eval() test_loss, test_acc, test_n = 0, 0, 0 for x, t in tqdm(test_loader, total=len(test_loader), leave=False): with torch.no_grad(): x, t = Variable(x.cuda()), Variable(t.cuda()) y = model(x) loss = loss_func(y, t) test_loss += loss.item() * t.size(0) test_acc += utils.accuracy(y, t).item() test_n += t.size(0) logger.write(e+1, lr, train_loss / train_n, test_loss / test_n, train_acc / train_n * 100, test_acc / test_n * 100) final_score = test_acc/test_n*100 with open("history.txt", "a") as f: command = " ".join(sys.argv) f.write("%s\nParameters: %d\nFinal test accuracy: %.5f\n" % (command, params, final_score))
def main(args): train_loader, test_loader = load_dataset( args.label, args.batch_size, root="/home/ubuntu/data1.5TB/cifar") if args.label == 10: model = ShakeResNet(args.depth, args.w_base, args.label) else: model = ShakeResNeXt(args.depth, args.w_base, args.cardinary, args.label) model = torch.nn.DataParallel(model).cuda() cudnn.benckmark = True opt = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay, nesterov=args.nesterov) loss_func = nn.CrossEntropyLoss().cuda() headers = [ "Epoch", "LearningRate", "TrainLoss", "TestLoss", "TrainAcc.", "TestAcc." ] logger = utils.Logger(args.checkpoint, headers) for e in range(args.epochs): lr = utils.cosine_lr(opt, args.lr, e, args.epochs) model.train() train_loss, train_acc, train_n = 0, 0, 0 bar = tqdm(total=len(train_loader), leave=False) for x, t in train_loader: x, t = Variable(x.cuda()), Variable(t.cuda()) y = model(x) loss = loss_func(y, t) opt.zero_grad() loss.backward() opt.step() train_acc += utils.accuracy(y, t).item() train_loss += loss.item() * t.size(0) train_n += t.size(0) bar.set_description("Loss: {:.4f}, Accuracy: {:.2f}".format( train_loss / train_n, train_acc / train_n * 100), refresh=True) bar.update() bar.close() model.eval() test_loss, test_acc, test_n = 0, 0, 0 for x, t in tqdm(test_loader, total=len(test_loader), leave=False): with torch.no_grad(): x, t = Variable(x.cuda()), Variable(t.cuda()) y = model(x) loss = loss_func(y, t) test_loss += loss.item() * t.size(0) test_acc += utils.accuracy(y, t).item() test_n += t.size(0) logger.write(e + 1, lr, train_loss / train_n, test_loss / test_n, train_acc / train_n * 100, test_acc / test_n * 100)
def main(args): #make sure cifar-10, cifar-100, STL-10 and Imagenet 32x32 downloaded in this path, # otherwise change the download to True in load_dataset function path = expanduser("~") + "/project/data" #the path of the data train_loader, test_loader = load_dataset(args.label, args.batch_size, path, args.dataset, download=False) cifarlike = (args.dataset=='cifar') if args.model =="shake": model = ShakeResNet(args.convBN, args.depth, args.factor, args.label, cifar=cifarlike) elif args.model == "senet56": model = se_resnet56(num_classes = args.label, reduction=8, convBN = args.convBN, cifar=cifarlike) elif args.model == 'resnet20': model = resnet20(num_classes = args.label, convBN = args.convBN, cifar=cifarlike) elif args.model == 'resnet50': model = resnet50(num_classes = args.label, convBN = args.convBN, cifar=cifarlike) elif args.model == 'mobilenet': if cifarlike and args.convBN.__name__=='conv3x3_bn': print("Small image input or standard convolution is not supported in MobileNet!") return model = MobileNetV2(num_classes = args.label, convBN = args.convBN, large=False) else: print("Invalid model!") return model = torch.nn.DataParallel(model).cuda() cudnn.benckmark = True print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) opt = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=args.weight_decay, nesterov=args.nesterov) loss_func = nn.CrossEntropyLoss().cuda() headers = ["Epoch", "LearningRate", "TrainLoss", "TestLoss", "TrainAcc.", "TestAcc."] logger = utils.Logger(args.log_path, args.log_file, headers) #let's train and test the model for e in range(args.epochs): lr = utils.cosine_lr(opt, args.lr, e, args.epochs) model.train() train_loss, train_acc, train_n = 0, 0, 0 for x, t in train_loader: x, t = Variable(x.cuda()), Variable(t.cuda()) y = model(x) loss = loss_func(y, t) opt.zero_grad() loss.backward() opt.step() train_acc += utils.accuracy(y, t).item() train_loss += loss.item() * t.size(0) train_n += t.size(0) model.eval() test_loss, test_acc, test_n = 0, 0, 0 for x, t in tqdm(test_loader, total=len(test_loader), leave=False): with torch.no_grad(): x, t = Variable(x.cuda()), Variable(t.cuda()) y = model(x) loss = loss_func(y, t) test_loss += loss.item() * t.size(0) test_acc += utils.accuracy(y, t).item() test_n += t.size(0) logger.write(e+1, lr, train_loss / train_n, test_loss / test_n, train_acc / train_n * 100, test_acc / test_n * 100)
def main(args): if torch.cuda.device_count() > 1: print("Let's use", torch.cuda.device_count(), "GPUs!") train_loader, test_loader = load_dataset(args.label, args.batch_size, args.half_length, args.nholes) if args.label == 10: model = ShakeResNet(args.depth, args.w_base, args.label) else: model = ShakeResNeXt(args.depth, args.w_base, args.cardinary, args.label) model = torch.nn.DataParallel(model).cuda() cudnn.benckmark = True if args.optimizer == 'sgd': print("using sgd") opt = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay, nesterov=args.nesterov) elif args.optimizer == 'abd': print("using adabound") opt = abd.AdaBound(model.parameters(), lr=args.lr, gamma=args.gamma, weight_decay=args.weight_decay, final_lr=args.final_lr) elif args.optimizer == 'swa': print("using swa") opt = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) steps_per_epoch = len(train_loader.dataset) / args.batch_size steps_per_epoch = int(steps_per_epoch) opt = swa(opt, swa_start=args.swa_start * steps_per_epoch, swa_freq=steps_per_epoch, swa_lr=args.swa_lr) else: print("not valid optimizer") exit loss_func = nn.CrossEntropyLoss().cuda() headers = [ "Epoch", "LearningRate", "TrainLoss", "TestLoss", "TrainAcc.", "TestAcc." ] #if args.optimizer=='swa': # headers = headers[:-1] + ['swa_te_loss', 'swa_te_acc'] + headers[-1:] # swa_res = {'loss': None, 'accuracy': None} logger = utils.Logger(args.checkpoint, headers, mod=args.optimizer) for e in range(args.epochs): if args.optimizer == 'swa': lr = utils.schedule(e, args.optimizer, args.epochs, args.swa_start, args.swa_lr, args.lr) utils.adjust_learning_rate(opt, lr) elif args.optimizer == 'sgd': lr = utils.cosine_lr(opt, args.lr, e, args.epochs) else: exit #train train_loss, train_acc, train_n = utils.train_epoch( train_loader, model, opt) #eval test_loss, test_acc, test_n = utils.eval_epoch(test_loader, model) logger.write(e + 1, lr, train_loss / train_n, test_loss / test_n, train_acc / train_n * 100, test_acc / test_n * 100) if args.optimizer == 'swa' and ( e + 1) >= args.swa_start and args.eval_freq > 1: if e == 0 or e % args.eval_freq == args.eval_freq - 1 or e == args.epochs - 1: opt.swap_swa_sgd() opt.bn_update(train_loaders, model, device='cuda') #swa_res = utils.eval_epoch(test_loaders['test'], model) opt.swap_swa_sgd()