def main(): global lr, best_prec1 model_path = 'pre_trained/resnet_cifar10.pth' epochs = 10 model = resnet.ResNet34(10) model.load_state_dict(torch.load(model_path, map_location=device)) test_model = resnet.ResNet34(10) test_model.load_state_dict(torch.load(model_path, map_location=device)) # model = nn.Sequential(nn.BatchNorm2d(num_features=3, affine=False), pretrained) # num_ftrs = model.fc.in_features # model.fc = nn.Linear(num_ftrs, 10) model.cuda() test_model.cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), lr, momentum=momentum, weight_decay=weight_decay) # lr_scheduler = torch.optim.lr_scheduler.MultiStepLR( # optimizer, milestones=[25, 50, 75], gamma=0.2) # train_loader, val_loader, class_names = datasets.get_train_test_dataset(dataset, batch_size, True) # dataiter = iter(train_loader) # images, labels = dataiter.next() train_loader, val_loader, _ = get_train_test_dataset( 'cifar10', batch_size, True) for epoch in range(epochs): adjust_learning_rate(optimizer, epoch, lr) # train for one epoch train_with_adv_exs(train_loader, model, test_model, criterion, optimizer, epoch, batch_size, 10) # lr_scheduler.step() # evaluate on validation set prec1 = validate_epoch(val_loader, model, criterion) is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'arch': arch, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best)
def __init__(self, n_classes, init_lr=0.1, momentum=0.9, weight_decay=5e-4, device='cuda', log_dir='', ckpt_file='', model='resnet-10', multi_gpu=True): super().__init__(n_classes, init_lr, momentum, weight_decay, device) self.n_planes = [64, 128, 256, 512] if model == 'resnet-10': self.net = resnet.ResNet10(n_classes=self.n_classes, n_output_planes=self.n_planes) if model == 'resnet-18': self.net = resnet.ResNet18(n_classes=self.n_classes, n_output_planes=self.n_planes) elif model == 'resnet-34': self.net = resnet.ResNet34(n_classes=self.n_classes, n_output_planes=self.n_planes) elif model == 'resnet-50': self.net = resnet.ResNet50(n_classes=self.n_classes, n_output_planes=self.n_planes) self.net.to(self.device) if ckpt_file: print('loading pretrained classifier checkpoint') if device == 'cpu': ckpt = torch.load(ckpt_file, map_location=lambda storage, loc: storage) else: ckpt = torch.load(ckpt_file) self.net.load_state_dict(ckpt['clf']) if multi_gpu and self.device == 'cuda': print('replicating model on multiple gpus ... ') self.net = torch.nn.DataParallel(self.net) self.optim = torch.optim.SGD(self.net.parameters(), self.init_lr, momentum=self.momentum, weight_decay=self.weight_decay) self.criterion = torch.nn.CrossEntropyLoss().to(self.device) print('Number of dnn parameters: {}'.format( sum([p.data.nelement() for p in self.net.parameters()]))) if log_dir: utils.save_model_desc(self.net, join(log_dir, 'classifier_desc.txt'))
def get_model(model_name, parameters): device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') if model_name == 'resnet18': net = resnet.ResNet18(parameters, num_classes=10).to(device) elif model_name == 'resnet34': net = resnet.ResNet34(parameters, num_classes=10).to(device) elif model_name == 'resnet50': net = resnet.ResNet50(parameters, num_classes=10).to(device) elif model_name == 'resnet101': net = resnet.ResNet101(parameters, num_classes=10).to(device) elif model_name == 'resnet152': net = resnet.ResNet152(parameters, num_classes=10).to(device) elif model_name == 'vgg16': net = 0 else: print("Entered student model is not compatibale currently!\n") net = -1 return net
def get_model(device): """ :param device: instance of torch.device :return: An instance of torch.nn.Module """ num_classes = 2 if config["dataset"] == "Cifar100": num_classes = 100 elif config["dataset"] == "Cifar10": num_classes = 10 elif config["dataset"] == "15-Scene": num_classes = 15 elif config["dataset"] == "MNIST": num_classes = 10 model = { "resnet10": lambda: resnet.ResNet10(num_classes=num_classes), "resnet18": lambda: resnet.ResNet18(num_classes=num_classes), "resnet34": lambda: resnet.ResNet34(num_classes=num_classes), "resnet50": lambda: resnet.ResNet50(num_classes=num_classes), "resnet101": lambda: resnet.ResNet101(num_classes=num_classes), "resnet152": lambda: resnet.ResNet152(num_classes=num_classes), "bert": lambda: modeling_bert_appendix.BertImage(config, num_classes=num_classes), }[config["model"]]() model.to(device) if device == "cuda": # model = torch.nn.DataParallel(model) # multiple GPUs not available # for free on Google Colab -EU torch.backends.cudnn.benchmark = True return model
def __init__(self, model_type = 'resnet34', num_output = 256, input_size = 224, pretrained = False, gray_scale = False): """ Args: model (string): type of model to be used. num_output (int): number of neurons in the last feature layer pretrained (boolean): whether to use a pre-trained model from ImageNet """ super(FeatureLayer, self).__init__() self.model_type = model_type self.num_output = num_output if self.model_type == 'resnet34': resnet34 = models.resnet34() if pretrained: # load pre-trained model resnet34.load_state_dict(torch.load("/home/nicholas/.torch/models/resnet34-333f7ec4.pth")) # replace the last layer resnet34.fc = None resnet34.fc = nn.Linear(512, self.num_output) self.add_module('resnet34', resnet34) elif self.model_type == 'resnet50': resnet50 = models.resnet50() if pretrained: # load pre-trained model resnet50.load_state_dict(torch.load("/home/nicholas/.torch/models/resnet50-19c8e357.pth")) # replace the last layer resnet50.fc = None resnet50.fc = nn.Linear(2048, self.num_output) self.add_module('resnet50', resnet50) elif self.model_type == 'hier_res': model = resnet.ResNet34(self.num_output, pretrained) self.add_module('hier_res34', model) elif self.model_type == 'hybrid': model = resnet.Hybridmodel(self.num_output) self.add_module('hybrid_model', model) else: raise NotImplementedError
def main(): dataloaders, classes = get_train_test_dataset('cifar10', 64, False) print("Data Loaded successfully") # Loading Base model base_res = resnet.ResNet34(10) base_res.load_state_dict(torch.load('pre_trained/resnet_cifar10.pth')) base_res.cuda() # Loading FGSM trained model # fgsm_res = resnet.ResNet34(10) # chkpoint = torch.load('saved_models/resnet34_checkpoint_cifar10_fgsm_317.pth.tar') # fgsm_res.load_state_dict(chkpoint['state_dict']) # fgsm_res.cuda() # Loading ILLC trained model # illc_res = resnet.ResNet34(10) # chkpoint = torch.load('saved_models/resnet34_checkpoint_cifar10_illc_406.pth.tar') # illc_res.load_state_dict(chkpoint['state_dict']) # illc_res.cuda() # Loading BIM trained model bim_res = resnet.ResNet34(10) chkpoint = torch.load( 'saved_models/resnet34_checkpoint_cifar10_trial.pth.tar') bim_res.load_state_dict(chkpoint['state_dict']) bim_res.cuda() # Loading DeepFool trained model df_res = resnet.ResNet34(10) chkpoint = torch.load( 'saved_models/resnet34_checkpoint_cifar10_df.pth.tar') df_res.load_state_dict(chkpoint['state_dict']) df_res.cuda() # Loading CWL2 trained model cw_res = resnet.ResNet34(10) chkpoint = torch.load( 'saved_models/resnet34_checkpoint_cifar10_cw_317.pth.tar') cw_res.load_state_dict(chkpoint['state_dict']) cw_res.cuda() # Loading CWLinf trained model cwinf_res = resnet.ResNet34(10) chkpoint = torch.load( 'saved_models/resnet34_checkpoint_cifar10_cwinf_406.pth.tar') cwinf_res.load_state_dict(chkpoint['state_dict']) cwinf_res.cuda() a = run_ensemble_test(base_res, cwinf_res, bim_res, df_res, cw_res, dataloaders) # print() # print("Evaluating FGSM Attack") # a = run_ens_fgsm_attack(dataloaders, base_res, cwinf_res, bim_res, df_res, cw_res) # # illc_res, df_res, cw_res, cwinf_res # print() # print("Evaluating BIM Attack") # a = run_ens_bim_attack(dataloaders, base_res, cwinf_res, bim_res, df_res, cw_res, 10) # print() # print("Evaluating ILLC Attack") # a = run_ens_illc_attack(dataloaders, base_res, cwinf_res, bim_res, df_res, cw_res, 10) # print() # print("Evaluating CWL2 Attack") # a = run_ens_cw_attack1(dataloaders, base_res, cwinf_res, bim_res, df_res, cw_res, 10) # print() # print("Evaluating CWLinf Attack") # a = run_ens_cw_attack(dataloaders, base_res, cwinf_res, bim_res, df_res, cw_res, 10, 'linf') # print() # print("Evaluating DeepFool Attack") # a = run_ens_deepfool_attack(dataloaders, base_res, cwinf_res, bim_res, df_res, cw_res, 10) # print() # print(true_label[10][0]) # print() # print(model1_prob[10][0]) # print() # print(model2_prob[10][0]) # print() # print(model3_prob[10][0]) # print() # print(model4_prob[10][0]) # print() # print(total_prob[10][0]) true_label_np = np.array(true_label) model1_prob_np = np.array(model1_prob) model2_prob_np = np.array(model2_prob) model3_prob_np = np.array(model3_prob) model4_prob_np = np.array(model4_prob) total_prob_np = np.array(total_prob) # print(true_label_np.shape, model1_prob_np.shape, model2_prob_np.shape, model3_prob_np.shape, model4_prob_np.shape) np.savetxt('data/true_label.csv', true_label_np, delimiter=',') np.savetxt('data/model1_prob.csv', model1_prob_np, delimiter=',') np.savetxt('data/model2_prob.csv', model2_prob_np, delimiter=',') np.savetxt('data/model3_prob.csv', model3_prob_np, delimiter=',') np.savetxt('data/model4_prob_np.csv', model4_prob_np, delimiter=',') np.savetxt('data/total_prob_np.csv', total_prob_np, delimiter=',')
def main(): # Data Loader (Input Pipeline) print('loading dataset...') train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) val_loader = torch.utils.data.DataLoader(dataset=val_dataset, batch_size=batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, num_workers=args.num_workers, drop_last=False, shuffle=False) # Define models print('building model...') if args.dataset == 'mnist': clf1 = LeNet() if args.dataset == 'fashionmnist': clf1 = resnet.ResNet18_F(10) if args.dataset == 'cifar10': clf1 = resnet.ResNet34(10) if args.dataset == 'svhn': clf1 = resnet.ResNet34(10) clf1.cuda() optimizer = torch.optim.SGD(clf1.parameters(), lr=args.lr, weight_decay=args.weight_decay) with open(txtfile, "a") as myfile: myfile.write('epoch train_acc val_acc test_acc\n') epoch = 0 train_acc = 0 val_acc = 0 # evaluate models with random weights test_acc = evaluate(test_loader, clf1) print('Epoch [%d/%d] Test Accuracy on the %s test data: Model1 %.4f %%' % (epoch + 1, args.n_epoch_1, len(test_dataset), test_acc)) # save results with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) + ' ' + str(test_acc) + ' ' + "\n") best_acc = 0.0 # training for epoch in range(1, args.n_epoch_1): # train models clf1.train() train_acc = train(clf1, train_loader, epoch, optimizer, nn.CrossEntropyLoss()) # validation val_acc = evaluate(val_loader, clf1) # evaluate models test_acc = evaluate(test_loader, clf1) # save results print( 'Epoch [%d/%d] Train Accuracy on the %s train data: Model %.4f %%' % (epoch + 1, args.n_epoch_1, len(train_dataset), train_acc)) print('Epoch [%d/%d] Val Accuracy on the %s val data: Model %.4f %% ' % (epoch + 1, args.n_epoch_1, len(val_dataset), val_acc)) print( 'Epoch [%d/%d] Test Accuracy on the %s test data: Model %.4f %% ' % (epoch + 1, args.n_epoch_1, len(test_dataset), test_acc)) with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) + ' ' + str(test_acc) + ' ' + "\n") if val_acc > best_acc: best_acc = val_acc torch.save(clf1.state_dict(), model_save_dir + '/' + 'model.pth') print('Matrix Factorization is doing...') clf1.load_state_dict(torch.load(model_save_dir + '/' + 'model.pth')) A = respresentations_extract(train_loader, clf1, len(train_dataset), args.dim, batch_size) A_val = respresentations_extract(val_loader, clf1, len(val_dataset), args.dim, batch_size) A_total = np.append(A, A_val, axis=0) W_total, H_total, error = train_m(A_total, args.basis, args.iteration_nmf, 1e-5) for i in range(W_total.shape[0]): for j in range(W_total.shape[1]): if W_total[i, j] < 1e-6: W_total[i, j] = 0. W = W_total[0:len(train_dataset), :] W_val = W_total[len(train_dataset):, :] print('Transition Matrix is estimating...Wating...') logits_matrix = probability_extract(train_loader, clf1, len(train_dataset), args.num_classes, batch_size) idx_matrix_group, transition_matrix_group = estimate_matrix( logits_matrix, model_save_dir) logits_matrix_val = probability_extract(val_loader, clf1, len(val_dataset), args.num_classes, batch_size) idx_matrix_group_val, transition_matrix_group_val = estimate_matrix( logits_matrix_val, model_save_dir) func = nn.MSELoss() model = Matrix_optimize(args.basis, args.num_classes) optimizer_1 = torch.optim.Adam(model.parameters(), lr=0.001) basis_matrix_group = basis_matrix_optimize(model, optimizer_1, args.basis, args.num_classes, W, transition_matrix_group, idx_matrix_group, func, model_save_dir, args.n_epoch_4) basis_matrix_group_val = basis_matrix_optimize( model, optimizer_1, args.basis, args.num_classes, W_val, transition_matrix_group_val, idx_matrix_group_val, func, model_save_dir, args.n_epoch_4) for i in range(basis_matrix_group.shape[0]): for j in range(basis_matrix_group.shape[1]): for k in range(basis_matrix_group.shape[2]): if basis_matrix_group[i, j, k] < 1e-6: basis_matrix_group[i, j, k] = 0. optimizer_ = torch.optim.SGD(clf1.parameters(), lr=args.lr, weight_decay=args.weight_decay, momentum=args.momentum) best_acc = 0.0 for epoch in range(1, args.n_epoch_2): # train model clf1.train() train_acc = train_correction(clf1, train_loader, epoch, optimizer_, W, basis_matrix_group, batch_size, args.num_classes, args.basis) # validation val_acc = val_correction(clf1, val_loader, epoch, W_val, basis_matrix_group_val, batch_size, args.num_classes, args.basis) # evaluate models test_acc = evaluate(test_loader, clf1) if val_acc > best_acc: best_acc = val_acc torch.save(clf1.state_dict(), model_save_dir + '/' + 'model.pth') with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) + ' ' + str(test_acc) + ' ' + "\n") # save results print( 'Epoch [%d/%d] Train Accuracy on the %s train data: Model %.4f %%' % (epoch + 1, args.n_epoch_2, len(train_dataset), train_acc)) print('Epoch [%d/%d] Val Accuracy on the %s val data: Model %.4f %% ' % (epoch + 1, args.n_epoch_2, len(val_dataset), val_acc)) print( 'Epoch [%d/%d] Test Accuracy on the %s test data: Model %.4f %% ' % (epoch + 1, args.n_epoch_2, len(test_dataset), test_acc)) clf1.load_state_dict(torch.load(model_save_dir + '/' + 'model.pth')) optimizer_r = torch.optim.Adam(clf1.parameters(), lr=args.lr_revision, weight_decay=args.weight_decay) nn.init.constant_(clf1.T_revision.weight, 0.0) for epoch in range(1, args.n_epoch_3): # train models clf1.train() train_acc = train_revision(clf1, train_loader, epoch, optimizer_r, W, basis_matrix_group, batch_size, args.num_classes, args.basis) # validation val_acc = val_revision(clf1, val_loader, epoch, W_val, basis_matrix_group, batch_size, args.num_classes, args.basis) # evaluate models test_acc = evaluate(test_loader, clf1) with open(txtfile, "a") as myfile: myfile.write( str(int(epoch)) + ' ' + str(train_acc) + ' ' + str(val_acc) + ' ' + str(test_acc) + ' ' + "\n") # save results print( 'Epoch [%d/%d] Train Accuracy on the %s train data: Model %.4f %%' % (epoch + 1, args.n_epoch_3, len(train_dataset), train_acc)) print('Epoch [%d/%d] Val Accuracy on the %s val data: Model %.4f %% ' % (epoch + 1, args.n_epoch_3, len(val_dataset), val_acc)) print( 'Epoch [%d/%d] Test Accuracy on the %s test data: Model %.4f %% ' % (epoch + 1, args.n_epoch_3, len(test_dataset), test_acc))
def main(args): if args.dataset == 'cifar10': test_dataloader = data.DataLoader(datasets.CIFAR10( args.data_path, download=True, transform=cifar_transformer(), train=False), batch_size=args.batch_size, drop_last=False) # train_dataset = CIFAR10(args.data_path) querry_dataloader = data.DataLoader(CIFAR10(args.data_path), batch_size=args.batch_size, drop_last=True) args.num_images = 50000 # args.num_val = 5000 # args.budget = 2500 # args.initial_budget = 5000 args.num_classes = 10 elif args.dataset == 'cifar100': test_dataloader = data.DataLoader(datasets.CIFAR100( args.data_path, download=True, transform=cifar_transformer(), train=False), batch_size=args.batch_size, drop_last=False) train_dataset = CIFAR100(args.data_path) querry_dataloader = data.DataLoader(CIFAR100(args.data_path), batch_size=args.batch_size, shuffle=True, drop_last=True) args.num_val = 5000 args.num_images = 50000 args.budget = 2500 args.initial_budget = 5000 args.num_classes = 100 elif args.dataset == 'tinyimagenet': test_dataloader = data.DataLoader(TinyImageNet( args.data_path, transform=tinyimagenet_transform(), train=False), batch_size=args.batch_size, drop_last=False) querry_dataloader = data.DataLoader(TinyImageNet( args.data_path, transform=tinyimagenet_transform(), train=True), shuffle=True, batch_size=args.batch_size, drop_last=True) args.num_classes = 200 args.num_images = 100000 elif args.dataset == 'imagenet': test_dataloader = data.DataLoader(datasets.ImageFolder( args.data_path, transform=imagenet_transformer()), drop_last=False, batch_size=args.batch_size) train_dataset = ImageNet(args.data_path) args.num_val = 128120 args.num_images = 1281167 args.budget = 64060 args.initial_budget = 128120 args.num_classes = 1000 else: raise NotImplementedError args.cuda = torch.cuda.is_available() # all_indices = set(np.arange(args.num_images)) # val_indices = random.sample(all_indices, args.num_val) # all_indices = np.setdiff1d(list(all_indices), val_indices) # initial_indices = random.sample(list(all_indices), args.initial_budget) # sampler = data.sampler.SubsetRandomSampler(initial_indices) # sampler = data.sampler.SubsetRandomSampler(list(all_indices)) # val_sampler = data.sampler.SubsetRandomSampler(val_indices) # dataset with labels available # querry_dataloader = data.DataLoader(train_dataset, sampler=sampler, # batch_size=args.batch_size, drop_last=True) # val_dataloader = data.DataLoader(train_dataset, sampler=val_sampler, # batch_size=args.batch_size, drop_last=False) val_dataloader = None args.cuda = args.cuda and torch.cuda.is_available() solver = Solver(args, test_dataloader) # splits = [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4] splits = [1.] # current_indices = list(initial_indices) # accuracies = [] print('==> Building models...') # task_model = vgg.vgg16_bn(num_classes=args.num_classes) task_model = resnet.ResNet34(num_classes=args.num_classes) # task_model = model.Approximator(args.latent_dim, args.num_classes) vae = model.VAE(args.latent_dim) discriminator = model.Discriminator(args.latent_dim) if args.cuda: vae = vae.cuda() discriminator = discriminator.cuda() task_model = task_model.cuda() vae = torch.nn.DataParallel(vae) discriminator = torch.nn.DataParallel(discriminator) task_model = torch.nn.DataParallel(task_model) for epoch in range(args.train_epochs): # unlabeled_indices = np.setdiff1d(list(all_indices), current_indices) # unlabeled_sampler = data.sampler.SubsetRandomSampler(unlabeled_indices) # unlabeled_dataloader = data.DataLoader(train_dataset, # sampler=unlabeled_sampler, batch_size=args.batch_size, drop_last=False) unlabeled_dataloader = None print('\nEpoch: %d' % epoch) # train the models on the current data acc, vae, discriminator, task_model = solver.train( epoch, querry_dataloader, val_dataloader, task_model, vae, discriminator, unlabeled_dataloader)
transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) data_train = CIFAR10(args.data, transform=transform_train) data_test = CIFAR10(args.data, train=False, transform=transform_test) data_train_loader = DataLoader(data_train, batch_size=128, shuffle=True, num_workers=8) data_test_loader = DataLoader(data_test, batch_size=100, num_workers=0) net = resnet.ResNet34().cuda() criterion = torch.nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) if args.dataset == 'cifar100': transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ])
trainy = 2. * ((years - lb) / (ub - lb)) - 1 trainloader.dataset.years = trainy trainloader.dataset.oldyears = years.copy() # Other prototype loading. stylepolars = np.load("dat/prototypes/sgd/prototypes-46d-46c.npy") stylepolars = torch.from_numpy(stylepolars).float() allpolars = [yearpolars, stylepolars] # Network type. if args.network == "std": model = convnet.Std(args.output_dims, None) elif args.network == "resnet16": model = resnet.ResNet18(args.output_dims, None) elif args.network == "resnet32": model = resnet.ResNet34(args.output_dims, None) model = model.to(device) # To CUDA. if args.multigpu == 1: model = torch.nn.DataParallel(model.cuda()) else: model = model.to(device) # Network parameters. optimname = args.optimizer lr = args.learning_rate momentum = args.momentum decay = args.decay params = model.parameters() # Set the optimizer.
def __init__(self, bn_statistics_group_size=1): self.name = 'resnet' self.cnn = resnet.ResNet34( bn_statistics_group_size=bn_statistics_group_size, name=self.name)
torch.cuda.manual_seed_all(seed) np.random.seed(seed) # Load data. batch_size = args.batch_size trainloader, testloader = helper.load_cub(args.datadir, \ batch_size, kwargs) nr_classes = 200 # Load the polars and update the trainy labels. classpolars = torch.from_numpy(np.load(args.hpnfile)).float() args.output_dims = int(args.hpnfile.split("/")[-1].split("-")[1][:-1]) # Load the model. if args.network == "resnet32": model = resnet.ResNet34(args.output_dims, classpolars) model = model.to(device) # Load the optimizer. optimizer = helper.get_optimizer(args.optimizer, model.parameters(), \ args.learning_rate, args.momentum, args.decay) # Initialize the loss functions. f_loss = nn.CosineSimilarity(eps=1e-9).cuda() # Main loop. testscores = [] learning_rate = args.learning_rate for i in xrange(args.epochs): print "---" # Learning rate decay.
testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=2) # 注意!! コンペに提出する場合は必ずshuffleをFalseに! classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'd', 'e', 'f', 'g', 'h', 'n', 'q', 'r', 't',) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(device) if layers == 18: net = resnet.ResNet18(num_class=num_class, channels=channels).to(device) elif layers == 34: net = resnet.ResNet34(num_class=num_class, channels=channels).to(device) elif layers == 50: net = resnet.ResNet50(num_class=num_class, channels=channels).to(device) elif layers == 101: net = resnet.ResNet101(num_class=num_class, channels=channels).to(device) else: net = resnet.ResNet152(num_class=num_class, channels=channels).to(device) print(net) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9) #scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[3, 7], gamma=0.1) best_score = 0.0
transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) data_train = CIFAR10(args.data, download=True, transform=transform_train) data_test = CIFAR10(args.data, train=False, transform=transform_test) data_train_loader = DataLoader(data_train, batch_size=128, shuffle=True, num_workers=8) data_test_loader = DataLoader(data_test, batch_size=100, num_workers=0) net = resnet.ResNet34().to(device) criterion = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) if args.dataset == 'cifar100': transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ])
def main(opt): """ """ print(f'image shape: {opt.channels} x {opt.img_size} x {opt.img_size}') if torch.cuda.device_count() == 0: device = torch.device('cpu') else: device = torch.device('cuda') accr = 0 accr_best = 0 generator = Generator(opt).to(device) if opt.dataset == 'imagenet': assert opt.teacher_model_name != 'none', 'DAFL does not support imagene' teacher = eval(f'models.{opt.teacher_model_name}(pretrained = True)') teacher = teacher.to(device) # teacher.eval() assert opt.student_model_name != 'none', 'DAFL does not support imagenet' net = eval(f'models.{opt.student_model_name}(pretrained = False)') net = net.to(device) transform_train = transforms.Compose([ transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) transform_test = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # for optimizing the teacher model if opt.train_teacher: data_train = torchvision.datasets.ImageNet( opt.data_dir, split='train', transform=transform_train) data_train_loader = DataLoader(data_train, batch_size=opt.batch_size, shuffle=True, num_workers=4, pin_memory=True) optimizer = torch.optim.Adam(teacher.parameters(), lr=0.001) # for optimizing the student model data_test = torchvision.datasets.ImageNet(opt.data_dir, split='val', transform=transform_test) data_test_loader = DataLoader(data_test, batch_size=opt.batch_size, num_workers=4, shuffle=False) optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr_G) optimizer_S = torch.optim.SGD(net.parameters(), lr=opt.lr_S, momentum=0.9, weight_decay=5e-4) else: if opt.dataset == 'MNIST': # use the original DAFL network if opt.teacher_model_name == 'none': teacher = LeNet5().to(device) # use torchvision models else: teacher = eval( f'models.{opt.teacher_model_name}(pretrained = False)') teacher.conv1 = nn.Conv2d( 1, teacher.conv1.out_channels, teacher.conv1.kernel_size, teacher.conv1.stride, teacher.conv1.padding, teacher.conv1.dilation, teacher.conv1.groups, teacher.conv1.bias, teacher.conv1.padding_mode) teacher.fc = nn.Linear(teacher.fc.in_features, 10) teacher = teacher.to(device) # use the original DAFL network if opt.student_model_name == 'none': net = LeNet5Half().to(device) # use torchvision models else: net = eval(f'models.{opt.student_model_name}()') net.conv1 = nn.Conv2d(1, net.conv1.out_channels, net.conv1.kernel_size, net.conv1.stride, net.conv1.padding, net.conv1.dilation, net.conv1.groups, net.conv1.bias, net.conv1.padding_mode) net.fc = nn.Linear(net.fc.in_features, 10) net = net.to(device) # for optimizing the teacher model if opt.train_teacher: data_train = MNIST(opt.data_dir, download=True, transform=transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) data_train_loader = DataLoader(data_train, batch_size=256, shuffle=True, num_workers=4) optimizer = torch.optim.Adam(teacher.parameters(), lr=0.001) # for optimizing the student model data_test = MNIST(opt.data_dir, download=True, train=False, transform=transforms.Compose([ transforms.Resize((32, 32)), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) data_test_loader = DataLoader(data_test, batch_size=64, num_workers=4, shuffle=False) optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr_G) optimizer_S = torch.optim.Adam(net.parameters(), lr=opt.lr_S) elif opt.dataset == 'cifar10': # use the original DAFL network if opt.teacher_model_name == 'none': teacher = resnet.ResNet34().to(device) # use torchvision models else: teacher = eval( f'models.{opt.teacher_model_name}(pretrained = False)') teacher.fc = nn.Linear(teacher.fc.in_features, 10) teacher = teacher.to(device) # use the original DAFL network if opt.student_model_name == 'none': net = resnet.ResNet18().to(device) # use torchvision models else: net = eval(f'models.{opt.student_model_name}()') net.fc = nn.Linear(net.fc.in_features, 10) net = net.to(device) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) # for optimizing the teacher model if opt.train_teacher: data_train = CIFAR10(opt.data_dir, download=True, transform=transform_train) data_train_loader = DataLoader(data_train, batch_size=128, shuffle=True, num_workers=4) optimizer = torch.optim.SGD(teacher.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) # for optimizing the student model data_test = CIFAR10(opt.data_dir, download=True, train=False, transform=transform_test) data_test_loader = DataLoader(data_test, batch_size=100, num_workers=4) optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr_G) optimizer_S = torch.optim.SGD(net.parameters(), lr=opt.lr_S, momentum=0.9, weight_decay=5e-4) elif opt.dataset == 'cifar100': # use the original DAFL network if opt.teacher_model_name == 'none': teacher = resnet.ResNet34(num_classes=100).to(device) # use torchvision models else: teacher = eval( f'models.{opt.teacher_model_name}(pretrained = False)') teacher.fc = nn.Linear(teacher.fc.in_features, 100) teacher = teacher.to(device) # use the original DAFL network if opt.student_model_name == 'none': net = resnet.ResNet18(num_classes=100).to(device) # use torchvision models else: net = eval(f'models.{opt.student_model_name}()') net.fc = nn.Linear(net.fc.in_features, 100) net = net.to(device) transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) # for optimizing the teacher model if opt.train_teacher: data_train = CIFAR100(opt.data_dir, download=True, transform=transform_train) data_train_loader = DataLoader(data_train, batch_size=128, shuffle=True, num_workers=4) optimizer = torch.optim.SGD(teacher.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) # for optimizing the student model data_test = CIFAR100(opt.data_dir, download=True, train=False, transform=transform_test) data_test_loader = DataLoader(data_test, batch_size=100, num_workers=4) optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr_G) optimizer_S = torch.optim.SGD(net.parameters(), lr=opt.lr_S, momentum=0.9, weight_decay=5e-4) # train the teacher model on the specified dataset if opt.train_teacher: train_teacher(teacher, data_train_loader, data_test_loader, optimizer, opt.n_epochs_teacher) if torch.cuda.device_count() > 1: teacher = nn.DataParallel(teacher) generator = nn.DataParallel(generator) net = nn.DataParallel(net) criterion = torch.nn.CrossEntropyLoss().cuda() if opt.pretest: test(teacher, data_test_loader) # ---------- # Training # ---------- batches_done = 0 for epoch in range(opt.n_epochs): total_correct = 0 avg_loss = 0.0 if opt.dataset != 'MNIST': adjust_learning_rate(optimizer_S, epoch, opt.lr_S) for i in range(120): net.train() z = torch.randn(opt.batch_size, opt.latent_dim).cuda() optimizer_G.zero_grad() optimizer_S.zero_grad() gen_imgs = generator(z) # teacher inference should not calculate gradients if opt.dataset != 'imagenet' and opt.teacher_model_name == 'none': outputs_T, features_T = teacher(gen_imgs, out_feature=True) else: features = [torch.Tensor().cuda(0)] def hook_features(model, input, output): features[0] = torch.cat((features[0], output.cuda(0)), 0) if torch.cuda.device_count() > 1: teacher.module.avgpool.register_forward_hook(hook_features) else: teacher.avgpool.register_forward_hook(hook_features) outputs_T = teacher(gen_imgs) features_T = features[0] pred = outputs_T.data.max(1)[1] loss_activation = -features_T.abs().mean() loss_one_hot = criterion(outputs_T, pred) softmax_o_T = torch.nn.functional.softmax(outputs_T, dim=1).mean(dim=0) loss_information_entropy = (softmax_o_T * torch.log10(softmax_o_T)).sum() loss = (loss_one_hot * opt.oh + loss_information_entropy * opt.ie + loss_activation * opt.a) loss_kd = kdloss(net(gen_imgs.detach()), outputs_T.detach()) loss += loss_kd loss.backward() optimizer_G.step() optimizer_S.step() if i == 1: print( f'[Epoch {epoch}/{opt.n_epochs}]'\ '[loss_oh: {loss_one_hot.item()}]'\ '[loss_ie: {loss_information_entropy.item()}]'\ '[loss_a: {loss_activation.item()}]'\ '[loss_kd: {loss_kd.item()}]' ) test(net, data_test_loader)