def main(): ckptdir = None if args.ckptdir == 'none' else args.ckptdir if ckptdir is not None and not os.path.isdir(ckptdir): os.makedirs(ckptdir) outdir = None if args.outdir == 'none' else args.outdir if outdir is not None and not os.path.isdir(outdir): os.makedirs(outdir) ########## models to train ########## if args.ens_comp == '3-model-ensemble': model_list = ['resnet20', 'resnet26', 'resnet32'] elif args.ens_comp == '7-model-ensemble': model_list = [ 'lenet', 'alexnet', 'resnet20', 'resnet110', 'densenet', 'vgg16', 'vgg19' ] name = model_list if args.name == 'none' else args.name assert isinstance(name, list), 'name must be a list' assert len(name) == len( model_list), 'the lengths of name and model_list must be equal' ########## dataset ########## trainset, testset, transform_test = get_dataset(args.dataset) num_classes = 10 device = 'cuda' if torch.cuda.is_available() else 'cpu' ########### load trained models ############## trained_models = [] for model_id in range(len(model_list)): checkpoint = torch.load('{}/{}.pth'.format(ckptdir, name[model_id])) model = get_architecture(model_list[model_id]) model.load_state_dict(checkpoint['net']) model = Softmaxize(model) model = model.to(device) model.eval() trained_models.append(model) ens_ckpt = torch.load('{}/{}.pth'.format(ckptdir, args.ens_ckpt)) if args.adp: ens_model = AEnsemble(trained_models, device, w=ens_ckpt['w'], rt_num=True) else: ens_model = Ensemble(trained_models, device, w=ens_ckpt['w']) ens_model.set_training_paras(weight=False) # Certify test print('===test(model=ensemble)===') t1 = time.time() ens_model.eval() certify(ens_model, device, testset, transform_test, num_classes, '{}/{}'.format(outdir, args.ens_name), start_img=args.start_img, num_img=args.num_img, skip=args.skip, sigma=args.sigma, adp=args.adp) t2 = time.time() print('Elapsed time: {}'.format(t2 - t1))
def main(): ckptdir = None if args.ckptdir == 'none' else args.ckptdir if ckptdir is not None and not os.path.isdir(ckptdir): os.makedirs(ckptdir) outdir = None if args.outdir == 'none' else args.outdir if outdir is not None and not os.path.isdir(outdir): os.makedirs(outdir) ########## models to train ########## if args.ens_comp == '3-model-ensemble': model_list = ['resnet20', 'resnet26', 'resnet32'] elif args.ens_comp == '7-model-ensemble': model_list = ['lenet', 'alexnet', 'resnet20', 'resnet110', 'densenet', 'vgg16', 'vgg19'] name = model_list if args.name == 'none' else args.name assert isinstance(name, list), 'name must be a list' assert len(name) == len(model_list), 'the lengths of name and model_list must be equal' ########## local args ########## start_epoch = 0 train_epochs = 150 milestones=[60, 90] ########## dataset ########## trainset, testset, transform_test = get_dataset(args.dataset) val_set = torch.utils.data.Subset(trainset, [i for i in range(len(trainset)-args.val_num,len(trainset))]) valloader = torch.utils.data.DataLoader( val_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) num_classes = 10 device = 'cuda' if torch.cuda.is_available() else 'cpu' ########### load trained models ############## trained_models = [] for model_id in range(len(model_list)): checkpoint = torch.load('{}/{}.pth'.format(ckptdir, name[model_id])) model = get_architecture(model_list[model_id]) model.load_state_dict(checkpoint['net']) model = Softmaxize(model) model = model.to(device) model.eval() trained_models.append(model) ############### Training Ensemble Paras ############### ens_model = Ensemble(trained_models, device) ens_model.set_training_paras() optimizer = optim.SGD(filter(lambda p: p.requires_grad, ens_model.parameters()), lr=0.01, momentum=0.9, weight_decay=5e-4) scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1) T0 = time.time() for epoch in range(start_epoch + 1, train_epochs + 1): print('===train(epoch={}, model=ensemble)==='.format(epoch)) t1 = time.time() scheduler.step() ens_model.eval() ens_train(args.sigma, 1, num_classes, ens_model, valloader, optimizer, device) t2 = time.time() print('Elapsed time: {}'.format(t2 - t1)) print('current w:',ens_model.w.data) print('current alpha:',(ens_model.w.exp()/ens_model.w.exp().sum()).data) T1 = time.time() print('Total elapsed time for weight solving: {}'.format(T1 - T0)) # Certify test print('===test(model=ensemble)===') t1 = time.time() ens_model.eval() certify(ens_model, device, testset, transform_test, num_classes, '{}/{}'.format(outdir, args.ens_name), start_img=args.start_img, num_img=args.num_img, skip=args.skip, sigma=args.sigma) t2 = time.time() print('Elapsed time: {}'.format(t2 - t1)) if ckptdir is not None: # Save checkpoint print('==> Saving Ens model.pth..') try: state = { 'w': ens_model.w.data, } torch.save(state, '{}/{}.pth'.format(ckptdir, args.ens_name)) except OSError: print('OSError while saving model {}.pth'.format(args.ens_name)) print('Ignoring...')
device) t2 = time.time() print('Elapsed time: {}'.format(t2 - t1)) if epoch % 20 == 0 and epoch >= 200: # Certify test print('===test(epoch={})==='.format(epoch)) t1 = time.time() model.eval() certify(model, device, testset, transform_test, num_classes, mode='hard', start_img=args.start_img, num_img=args.num_img, sigma=args.sigma, beta=args.beta, matfile=(None if matdir is None else os.path.join( matdir, '{}.mat'.format(epoch)))) t2 = time.time() print('Elapsed time: {}'.format(t2 - t1)) if ckptdir is not None: # Save checkpoint print('==> Saving {}.pth..'.format(epoch)) try: state = { 'net': base_model.state_dict(), 'epoch': epoch,
def main(): global args args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' args.save = args.optimizer + '_' + args.model + '_' + args.dataset + '_' + args.training_method + '_' + \ str(args.lr) + '_' + str(args.sigma) + '_' + str(args.lam) + '_' + str(args.gamma) + '_' + str(args.beta) save_path = os.path.join(args.save_path, args.save) if not os.path.exists(save_path): os.makedirs(save_path) logging.info("creating model %s", args.model) if args.dataset == 'imagenet': model = resnet50() else: model = resnet110() if device == 'cuda': model = model.to(device) # model = torch.nn.DataParallel(model) cudnn.benchmark = True # print("created model with configuration: %s", model_config) print("run arguments: %s", args) with open(save_path + '/log.txt', 'a') as f: f.writelines(str(args) + '\n') start_epoch = 0 # start from epoch 0 or last checkpoint epoch num_parameters = sum([l.nelement() for l in model.parameters()]) print("number of parameters: {}".format(num_parameters)) # Data print('==> Preparing data..') if args.dataset == 'cifar10': transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) elif args.dataset == 'cifar100': transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), # transforms.Normalize((0.5070588235294118, 0.48666666666666664, 0.4407843137254902), # (0.26745098039215687, 0.2564705882352941, 0.27607843137254906)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0.5070588235294118, 0.48666666666666664, 0.4407843137254902), # (0.26745098039215687, 0.2564705882352941, 0.27607843137254906)), ]) elif args.dataset == 'imagenet': transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), ]) elif args.dataset == 'svhn': transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) elif args.dataset == 'mnist': transform_train = transforms.Compose([ transforms.RandomCrop(28, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), # transforms.Normalize((0.5), # (0.5)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0:wq.5), # (0.5)), ]) else: raise ValueError('No such dataset') if args.optimizer == 'adam': optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = MultiStepLR(optimizer, milestones=[200, 400], gamma=args.lr_decay_ratio) else: optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) if args.epochs <= 200: scheduler = MultiStepLR(optimizer, milestones=[60, 120], gamma=args.lr_decay_ratio) else: scheduler = MultiStepLR(optimizer, milestones=[200, 400], gamma=args.lr_decay_ratio) if args.dataset == 'cifar10': trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=1) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=1) elif args.dataset == 'cifar100': trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train) testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=True, transform=transform_test) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=1) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=1) elif args.dataset == 'svhn': trainset = torchvision.datasets.SVHN(root='./data', split='train', download=True, transform=transform_train) testset = torchvision.datasets.SVHN(root='./data', split='test', download=True, transform=transform_test) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=1) testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=1) elif args.dataset == 'mnist': trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train) testset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform_test) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=1) testloader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=1) elif args.dataset == 'imagenet': print('Loading data from zip file') train_dir = os.path.join(args.data_dir, 'train.zip') valid_dir = os.path.join(args.data_dir, 'validation.zip') print('Loading data into memory') trainset = InMemoryZipDataset(train_dir, transform_train, 32) testset = InMemoryZipDataset(valid_dir, transform_test, 32) print('Found {} in training data'.format(len(trainset))) print('Found {} in validation data'.format(len(testset))) trainloader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=16) testloader = torch.utils.data.DataLoader(testset, batch_size=1, shuffle=False, pin_memory=True, num_workers=1) else: raise ValueError('There is no such dataset') if args.resume == 'True': # Load checkpoint. print('==> Resuming from checkpoint..') if os.path.exists(save_path + '/ckpt.t7'): #, 'Error: no results directory found!' checkpoint = torch.load(save_path + '/ckpt.t7') model.load_state_dict(checkpoint['model']) start_epoch = checkpoint['epoch'] + 1 scheduler.step(start_epoch) if args.dataset == 'imagenet': num_classes = 1000 else: num_classes = 10 train_vector = [] if args.task == 'train': for epoch in range(start_epoch, args.epochs + 1): lr = optimizer.param_groups[0]['lr'] scheduler.step() print('create an optimizer with learning rate as:', lr) model.train() start_time = time.time() if args.dataset != 'imagenet': if args.sigma == 1.0: if epoch >= 200: lam = args.lam else: lam = 0 else: lam = args.lam else: if epoch > 90: lam = 0 else: lam = args.lam c_loss, r_loss, acc = macer_train(args.training_method, args.sigma, lam, args.gauss_num, args.beta, args.gamma, num_classes, model, trainloader, optimizer, device, args.label_smoothing) print( 'Training time for each epoch is %g, optimizer is %s, model is %s' % (time.time() - start_time, args.optimizer, args.model + str(args.depth))) if args.epochs >= 200: if epoch % 50 == 0 and epoch >= 400: # Certify test print('===test(epoch={})==='.format(epoch)) t1 = time.time() model.eval() test( model, device, testloader, num_classes, mode='both', sigma=args.sigma, beta=args.beta, file_path=(None if save_path is None else os.path.join( save_path, 'test_accuracy.txt'))) certify( model, device, testset, num_classes, mode='hard', start_img=500, num_img=500, skip=1, sigma=args.sigma, beta=args.beta, matfile=(None if save_path is None else os.path.join( save_path, 'certify_radius{}.txt'.format(epoch)))) t2 = time.time() print('Elapsed time: {}'.format(t2 - t1)) else: if epoch % 30 == 0 and epoch >= 90: # Certify test print('===test(epoch={})==='.format(epoch)) t1 = time.time() model.eval() test( model, device, testloader, num_classes, mode='both', sigma=args.sigma, beta=args.beta, file_path=(None if save_path is None else os.path.join( save_path, 'test_accuracy.txt'))) certify( model, device, testset, num_classes, mode='hard', start_img=500, num_img=500, skip=1, sigma=args.sigma, beta=args.beta, matfile=(None if save_path is None else os.path.join( save_path, 'certify_radius{}.txt'.format(epoch)))) t2 = time.time() print('Elapsed time: {}'.format(t2 - t1)) print('\n Epoch: {0}\t' 'Cross Entropy Loss {c_loss:.4f} \t' 'Robust Loss {r_loss:.3f} \t' 'Total Loss {loss:.4f} \t' 'Accuracy {acc:.4f} \n'.format(epoch + 1, c_loss=c_loss, r_loss=r_loss, loss=c_loss - r_loss, acc=acc)) with open(save_path + '/log.txt', 'a') as f: f.write( str('\n Epoch: {0}\t' 'Cross Entropy Loss {c_loss:.4f} \t' 'Robust Loss {r_loss:.3f} \t' 'Accuracy {acc:.4f} \t' 'Total Loss {loss:.4f} \n'.format(epoch + 1, c_loss=c_loss, r_loss=r_loss, acc=acc, loss=c_loss - r_loss)) + '\n') state = { 'model': model.state_dict(), 'epoch': epoch, # 'trainset': trainset } if not os.path.isdir(save_path): os.mkdir(save_path) torch.save(state, save_path + '/ckpt.t7') if epoch % 10 == 0: torch.save(state, save_path + '/{}.t7'.format(epoch)) if os.path.exists(save_path + '/train_vector'): with open(save_path + '/train_vector', 'rb') as fp: train_vector = pickle.load(fp) train_vector.append([epoch, c_loss, r_loss, acc]) with open(save_path + '/train_vector', 'wb') as fp: pickle.dump(train_vector, fp) else: test(model, device, testloader, num_classes, mode='both', sigma=args.sigma, beta=args.beta, file_path=(None if save_path is None else os.path.join( save_path, 'test_accuracy.txt'))) certify(model, device, testset, num_classes, mode='hard', start_img=500, num_img=500, skip=1, sigma=args.sigma, beta=args.beta, matfile=(None if save_path is None else os.path.join( save_path, 'certify_radius.txt')))
def main(): ckptdir = None if args.ckptdir == 'none' else args.ckptdir if ckptdir is not None and not os.path.isdir(ckptdir): os.makedirs(ckptdir) outdir = None if args.outdir == 'none' else args.outdir if outdir is not None and not os.path.isdir(outdir): os.makedirs(outdir) checkpoint = None if args.resume_ckpt == 'none' else args.resume_ckpt ########## models to train ########## model = get_architecture(args.arch) name = args.arch if args.name == 'none' else args.name ########## local args ########## start_epoch = 0 if args.train_scheme == 'std': train_epochs = 400 milestones = [150, 300] if args.train_scheme == 'macer': train_epochs = 440 milestones = [200, 400] ########## dataset ########## trainset, testset, transform_test = get_dataset(args.dataset) train_set = torch.utils.data.Subset( trainset, [i for i in range(len(trainset) - args.val_num)]) trainloader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True, num_workers=args.workers) num_classes = 10 device = 'cuda' if torch.cuda.is_available() else 'cpu' T0 = time.time() model = model.to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4) scheduler = MultiStepLR(optimizer, milestones=milestones, gamma=0.1) filename = '{}/{}'.format(outdir, name) # Resume from checkpoint if required if checkpoint is not None: print('==> Resuming from checkpoint..') print(checkpoint) checkpoint = torch.load(checkpoint) model.load_state_dict(checkpoint['net']) start_epoch = checkpoint['epoch'] scheduler.step(start_epoch) for epoch in range(start_epoch + 1, train_epochs + 1): print('===train(epoch={}, model={})==='.format(epoch, name)) t1 = time.time() model.train() if args.train_scheme == 'std': std_train(args.sigma, 1, num_classes, model, trainloader, optimizer, device) elif args.train_scheme == 'macer': macer_train(args.sigma, args.lbd, 16, 16.0, 8.0, num_classes, model, trainloader, optimizer, device) else: raise ValueError('train_scheme must be either std or macer') scheduler.step() t2 = time.time() print('Elapsed time: {}'.format(t2 - t1)) T1 = time.time() print('Total elapsed time for training: {}'.format(T1 - T0)) # Certify test print('===test(model={})==='.format(name)) t1 = time.time() model.eval() certify(model, device, testset, transform_test, num_classes, filename, start_img=args.start_img, num_img=args.num_img, skip=args.skip, sigma=args.sigma) t2 = time.time() print('Elapsed time: {}'.format(t2 - t1)) if ckptdir is not None: # Save checkpoint print('==> Saving model {}.pth..'.format(name)) try: state = {'net': model.state_dict(), 'epoch': epoch} torch.save(state, '{}/{}.pth'.format(ckptdir, name)) except OSError: print('OSError while saving {}.pth'.format(name)) print('Ignoring...')