def train(epoch): net.train() train_loss = 0 correct = 0 total = 0 m = math.ceil(len(trainset) / batch_size) optimizer = optim.Adam(net.parameters(), lr=cf.learning_rate(args.lr, epoch), weight_decay=args.weight_decay) print('\n=> Training Epoch #%d, LR=%.4f' % (epoch, cf.learning_rate(args.lr, epoch))) for batch_idx, (inputs_value, targets) in enumerate(trainloader): # repeat samples for x = inputs_value.view(-1, inputs, resize, resize).repeat(args.num_samples, 1, 1, 1) print(x.shape) y = targets.repeat(args.num_samples) if use_cuda: x, y = x.cuda(), y.cuda() # GPU settings if args.beta_type is "Blundell": beta = 2**(m - (batch_idx + 1)) / (2**m - 1) elif args.beta_type is "Soenderby": beta = min(epoch / (num_epochs // 4), 1) elif args.beta_type is "Standard": beta = 1 / m else: beta = 0 # Forward Propagation x, y = Variable(x), Variable(y) outputs, kl = net.probforward(x) #print(outputs.shape) loss = vi(outputs, y, kl, beta) # Loss optimizer.zero_grad() loss.backward() # Backward Propagation optimizer.step() # Optimizer update train_loss += loss.data _, predicted = torch.max(outputs.data, 1) total += targets.size(0) correct += predicted.eq(y.data).cpu().sum() sys.stdout.write('\r') sys.stdout.write( '| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' % (epoch, num_epochs, batch_idx + 1, (len(trainset) // batch_size) + 1, loss.data, (100 * correct / total) / args.num_samples)) sys.stdout.flush() #diagnostics_to_write = {'Epoch': epoch, 'Loss': loss.data[0], 'Accuracy': (100*correct/total)/args.num_samples} diagnostics_to_write = { 'Epoch': epoch, 'Loss': loss.data, 'Accuracy': (100 * correct / total) / args.num_samples } with open(logfile, 'a') as lf: lf.write(str(diagnostics_to_write))
def train(epoch): net.train() # torch.nn.Module.train:torch.nn.Module.train: Sets the module in training mode. train_loss = 0 correct = 0 total = 0 m = math.ceil(len(trainset) / batch_size) optimizer = optim.Adam(net.parameters(), lr=cf.learning_rate(args.lr, epoch), weight_decay=args.weight_decay) print('\n=> Training Epoch #%d, LR=%.4f' %(epoch, cf.learning_rate(args.lr, epoch))) for batch_idx, (inputs_value, targets) in enumerate(trainloader): #print(input_value) #print(targets) x = inputs_value.view(-1, inputs, resize, resize).repeat(args.num_samples, 1, 1, 1) # after repeat, the first dimension of x becomes args.num_samples of the original size #x = inputs_value.repeat(args.num_samples, 1, 1, 1) #breakpoint() y = targets.repeat(args.num_samples) #y = targets.repeat(args.num_samples, 1) if use_cuda: x, y = x.cuda(), y.cuda() # GPU settings if args.beta_type is "Blundell": beta = 2 ** (m - (batch_idx + 1)) / (2 ** m - 1) elif args.beta_type is "Soenderby": beta = min(epoch / (num_epochs // 4), 1) elif args.beta_type is "Standard": beta = 1 / m else: beta = 0 # Forward Propagation x, y = Variable(x), Variable(y) outputs, kl = net.probforward(x) # prob.forward is not from torch.nn.Module # torch.nn.Module.forward: Although the recipe for forward pass needs to be defined within this function, one should call the Module instance afterwards instead of this since the former takes care of running the registered hooks while the latter silently ignores them. print(x.shape) print(outputs.shape) # here is the bug print(kl.shape) # scalar shape should be empty here loss = vi(outputs, y, kl, beta) # Loss, equivalent to calling vi.forward(outputs, y, kl, beta) optimizer.zero_grad() # Clears the gradients of all optimized torch.Tensor s. loss.backward() # Backward Propagation optimizer.step() # Optimizer update train_loss += loss.data _, predicted = torch.max(outputs.data, dim = 1) # Returns the maximum value of each row of the input tensor in the given dimension dim. The second return value is the index location of each maximum value found (argmax). total += targets.size(0) correct += predicted.eq(y.data).cpu().sum() sys.stdout.write('\r') sys.stdout.write('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f Acc@1: %.3f%%' %(epoch, num_epochs, batch_idx+1, (len(trainset)//batch_size)+1, loss.data, (100*correct.to(dtype=torch.float)/float(total))/args.num_samples)) sys.stdout.flush() #diagnostics_to_write = {'Epoch': epoch, 'Loss': loss.data[0], 'Accuracy': (100*correct.to(dtype=torch.float)/float(total))/args.num_samples} diagnostics_to_write = {'Epoch': epoch, 'Loss': loss.data, 'Accuracy': (100*correct.to(dtype=torch.float)/float(total))/args.num_samples} with open(logfile, 'a') as lf: lf.write(str(diagnostics_to_write))
def cross_validation_for_clustered_data(num_labels, num_cluster, args): print("cross validation for clustered data") best_acc = 0 resize = cf.resize start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type results = {} for i in range(num_cluster): cv_idx = i test_list = [i] train_eval_list = list(range(num_cluster)) train_eval_list = [x for x in train_eval_list if x != i] print(test_list, train_eval_list) trainset, evalset, testset, inputs, outputs = prepare_data( args, train_eval_list, test_list, resize) # Hyper Parameter settings use_cuda = torch.cuda.is_available() use_cuda = cf.use_cuda() if use_cuda is True: torch.cuda.set_device(0) best_acc = 0 resize = cf.resize start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4) evalloader = torch.utils.data.DataLoader(evalset, batch_size=batch_size, shuffle=False, num_workers=4) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4) # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name # Model print('\n[Phase 2] : Model setup') if args.resume: # Load checkpoint print('| Resuming from checkpoint...') assert os.path.isdir( 'checkpoint'), 'Error: No checkpoint directory found!' _, file_name = getNetwork(args, inputs, outputs) checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name + args.cv_type + str(cv_idx) + '.t7') # checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name + '.t7') net = checkpoint['net'] best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] else: print('| Building net type [' + args.net_type + ']...') net, file_name = getNetwork(args, inputs, outputs) if use_cuda: net.cuda() vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss()) logfile_train = os.path.join( 'diagnostics_Bayes{}_{}_cv{}_train_vgmm.txt'.format( args.net_type, args.dataset, i)) logfile_test = os.path.join( 'diagnostics_Bayes{}_{}_cv{}_test_vgmm.txt'.format( args.net_type, args.dataset, i)) logfile_eval = os.path.join( 'diagnostics_Bayes{}_{}_cv{}_val_vgmm.txt'.format( args.net_type, args.dataset, i)) print('\n[Phase 3] : Training model with validation') print('| Training Epochs = ' + str(num_epochs)) print('| Initial Learning Rate = ' + str(args.lr)) print('| Optimizer = ' + str(optim_type)) elapsed_time = 0 train_return = [] eval_return = [] test_return = [] for epoch in range(start_epoch, start_epoch + num_epochs): start_time = time.time() temp_train_return = train(epoch, trainset, inputs, net, batch_size, trainloader, resize, num_epochs, use_cuda, vi, logfile_train) temp_eval_return = test(epoch, evalset, inputs, batch_size, evalloader, net, use_cuda, num_epochs, resize, vi, logfile_eval, file_name) temp_test_return = test(epoch, testset, inputs, batch_size, testloader, net, use_cuda, num_epochs, resize, vi, logfile_test, "test") train_return = np.append(train_return, temp_train_return) eval_return = np.append(eval_return, temp_eval_return) test_return = np.append(test_return, temp_test_return) print(temp_train_return) print(temp_eval_return) print(temp_test_return) epoch_time = time.time() - start_time elapsed_time += epoch_time print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time))) print('\n[Phase 4] : Testing model') print('* Test results : Acc@1 = %.2f%%' % (best_acc)) results[str(i)] = { "train": train_return, "test": test_return, "val": eval_return } print(results) return results
'acc': acc, 'epoch': epoch, } if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') save_point = './checkpoint/' + args.dataset + os.sep if not os.path.isdir(save_point): os.mkdir(save_point) torch.save(state, save_point + file_name + '.t7') best_acc = acc print('\n[Phase 3] : Training model') print('| Training Epochs = ' + str(num_epochs)) print('| Initial Learning Rate = ' + str(args.lr)) print('| Optimizer = ' + str(optim_type)) elapsed_time = 0 for epoch in range(start_epoch, start_epoch + num_epochs): start_time = time.time() train(epoch) test(epoch) epoch_time = time.time() - start_time elapsed_time += epoch_time print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time))) print('\n[Phase 4] : Testing model') print('* Test results : Acc@1 = %.2f%%' % (best_acc))
def cross_validation(num_labels,num_cluster,args): method = args.cv_type print("cross validation for random resampling") best_acc = 0 resize = cf.resize start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type results = {} ds = mdataset_class.InputDataset("fashion-mnist", -1, 10) #X, y = utils_parent.load_mnist('fashion-mnist') X, y = ds.data_X, ds.data_y kf = KFold(n_splits=num_cluster, shuffle = True) mlist = list(kf.split(X,y)) #i = 0 #for train_eval_idx, test_idx in kf.split(X, y): #iterator for i in range(num_cluster): #iterator #breakpoint() iter = kf.split(X,y); for xx in iter: print(xx); it seems that KFold.split works cv_idx = i if method == "rand": #i = i +1 train_eval_idx = list(mlist[i][0]) test_idx = list(mlist[i][1]) trainset, evalset, testset, inputs, outputs = prepare_data_for_normal_cv(args, train_eval_idx, test_idx, resize) elif method == "vgmm": test_list = [i] train_eval_list = list(range(num_cluster)) train_eval_list = [x for x in train_eval_list if x != i] print(test_list,train_eval_list) trainset, evalset, testset,inputs,outputs = prepare_data(args,train_eval_list,test_list,resize, method = "vgmm") else: raise NotImplementedError # Hyper Parameter settings use_cuda = torch.cuda.is_available() use_cuda = cf.use_cuda() if use_cuda is True: torch.cuda.set_device(GPUIndex) best_acc = 0 resize = cf.resize start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4) evalloader = torch.utils.data.DataLoader(evalset, batch_size=batch_size, shuffle=False, num_workers=4) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4) # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name # Model print('\n[Phase 2] : Model setup') if args.resume: # Load checkpoint print('| Resuming from checkpoint...') assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!' _, file_name = getNetwork(args, inputs, outputs) checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name+ args.cv_type + str(cv_idx) + '.t7') net = checkpoint['net'] best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] else: print('| Building net type [' + args.net_type + ']...') net, file_name = getNetwork(args, inputs, outputs) if use_cuda: net.cuda() vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss()) #logfile = os.path.join('diagnostics_Bayes{}_{}.txt'.format(args.net_type, args.dataset)) logfile_train = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_train_rand.txt'.format(args.net_type, args.dataset, i)) logfile_test = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_test_rand.txt'.format(args.net_type, args.dataset, i)) logfile_eval = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_val_rand.txt'.format(args.net_type, args.dataset, i)) print('\n[Phase 3] : Training model') print('| Training Epochs = ' + str(num_epochs)) print('| Initial Learning Rate = ' + str(args.lr)) print('| Optimizer = ' + str(optim_type)) elapsed_time = 0 train_return = [] test_return = [] eval_return = [] for epoch in range(start_epoch, start_epoch + num_epochs): start_time = time.time() temp_train_return = train(epoch, trainset, inputs, net, batch_size, trainloader, resize, num_epochs, use_cuda, vi, logfile_train) temp_eval_return = test(epoch, evalset, inputs, batch_size, evalloader, net, use_cuda, num_epochs, resize, vi, logfile_eval,file_name) temp_test_return = test(epoch, testset, inputs, batch_size, testloader, net, use_cuda, num_epochs, resize, vi, logfile_test, "test") train_return = np.append(train_return,temp_train_return) eval_return = np.append(eval_return,temp_eval_return) test_return = np.append(test_return, temp_test_return) print(temp_train_return) print(temp_eval_return) print(temp_test_return) epoch_time = time.time() - start_time elapsed_time += epoch_time print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time))) print('\n[Phase 4] : Testing model') print('* Test results : Acc@1 = %.2f%%' % (best_acc)) results[str(i)] = {"train": train_return, "test": test_return, "eval": eval_return} print(results) return results
parser.add_argument('--lr', default=0.0001, type=float, help='learning_rate') parser.add_argument('--net_type', default='3conv3fc', type=str, help='model') #parser.add_argument('--depth', default=28, type=int, help='depth of model') #parser.add_argument('--widen_factor', default=10, type=int, help='width of model') parser.add_argument('--num_samples', default=10, type=int, help='Number of samples') parser.add_argument('--beta_type', default="Blundell", type=str, help='Beta type') parser.add_argument('--p_logvar_init', default=0, type=int, help='p_logvar_init') parser.add_argument('--q_logvar_init', default=-10, type=int, help='q_logvar_init') parser.add_argument('--weight_decay', default=0.0005, type=float, help='weight_decay') parser.add_argument('--dataset', default='mnist', type=str, help='dataset = [mnist/cifar10/cifar100]') parser.add_argument('--resume', '-r', action='store_true', help='resume from checkpoint') parser.add_argument('--testOnly', '-t', action='store_true', help='Test mode with the saved model') args = parser.parse_args() # Hyper Parameter settings # use_cuda = torch.cuda.is_available() use_cuda = cf.use_cuda() if use_cuda is True: torch.cuda.set_device(0) best_acc = 0 resize = cf.resize start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type # Data Uplaod print('\n[Phase 1] : Data Preparation') transform_train = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]), ]) # meanstd transformation
def tr_val_te(ds, num_labels, num_cluster, args, cv_idx, config_parent): print("cross validation for random resampling") best_acc = 0 resize = cf.resize start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type transform_train = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]), ]) # meanstd transformation transform_test = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]), ]) print('\n[Phase 1] : Data Preparation') trainset, evalset, testset, inputs, outputs = ds.prepare_data(config_parent, args, transform_train, transform_test, cv_idx, num_cluster) # Hyper Parameter settings use_cuda = torch.cuda.is_available() use_cuda = cf.use_cuda() if use_cuda is True: torch.cuda.set_device(args.g) print("*** using gpu ind", args.g) best_acc = 0 resize = cf.resize start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4) evalloader = torch.utils.data.DataLoader(evalset, batch_size=batch_size, shuffle=False, num_workers=4) testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4) # num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name # Model print('\n[Phase 2] : Model setup') if args.resume: # Load checkpoint print('| Resuming from checkpoint...') assert os.path.isdir('checkpoint'), 'Error: No checkpoint directory found!' _, file_name = getNetwork(args, inputs, outputs) checkpoint = torch.load('./checkpoint/' + args.dataset + os.sep + file_name+ args.cv_type + str(cv_idx) + '.t7') net = checkpoint['net'] best_acc = checkpoint['acc'] start_epoch = checkpoint['epoch'] else: print('| Building net type [' + args.net_type + ']...') net, file_name = getNetwork(args, inputs, outputs) if use_cuda: net.cuda() vi = GaussianVariationalInference(torch.nn.CrossEntropyLoss()) rstfolder = args.rst_dir logfile_train = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_train_{}.txt'.format(args.net_type, args.dataset, cv_idx, args.cv_type)) logfile_test = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_test_{}.txt'.format(args.net_type, args.dataset, cv_idx, args.cv_type)) logfile_eval = os.path.join(rstfolder, 'diagnostics_Bayes{}_{}_cv{}_val_{}.txt'.format(args.net_type, args.dataset, cv_idx, args.cv_type)) print('\n[Phase 3] : Training model') print('| Training Epochs = ' + str(num_epochs)) print('| Initial Learning Rate = ' + str(args.lr)) print('| Optimizer = ' + str(optim_type)) elapsed_time = 0 train_return = [] test_return = [] eval_return = [] for epoch in range(start_epoch, start_epoch + num_epochs): start_time = time.time() temp_train_return = train(epoch, trainset, inputs, net, batch_size, trainloader, resize, num_epochs, use_cuda, vi, logfile_train) temp_eval_return = test(epoch, evalset, inputs, batch_size, evalloader, net, use_cuda, num_epochs, resize, vi, logfile_eval, file_name) temp_test_return = test(epoch, testset, inputs, batch_size, testloader, net, use_cuda, num_epochs, resize, vi, logfile_test, "test") train_return = np.append(train_return,temp_train_return) eval_return = np.append(eval_return,temp_eval_return) test_return = np.append(test_return, temp_test_return) print(temp_train_return) print(temp_eval_return) print(temp_test_return) epoch_time = time.time() - start_time elapsed_time += epoch_time print('| Elapsed time : %d:%02d:%02d' % (cf.get_hms(elapsed_time))) print('\n[Phase 4] : Testing model') print('* Test results : Acc@1 = %.2f%%' % (best_acc)) rst = {"train": train_return, "test": test_return, "eval": eval_return} return rst