def evaluate_adv_files_df(files_df, denoise_model, device): results = pd.DataFrame() dataloaders = make_generators_DF_cifar(files_df, batch_size=5, num_workers=4, size=32, path_colname='path', adv_path_colname='adv_path', return_loc=True) denoise_model.eval() with torch.no_grad(): for i, (orig, adv, target, (path, adv_path)) in enumerate(dataloaders['val']): orig, adv, target = orig.to(device), adv.to(device), target.to( device) orig_out, denoised_orig_pred, adv_out, denoised_adv_pred = denoise_model( orig, adv, eval_mode=True) for i, true_label in enumerate(target): results = results.append( { 'path': path[i], 'adv_path': adv_path[i], 'true_label': int(true_label.cpu().numpy()), 'orig_pred': int(orig_out[i].argmax().cpu().numpy()), 'denoised_orig_pred': int(denoised_orig_pred[i].argmax().cpu().numpy()), 'adv_pred': int(adv_out[i].argmax().cpu().numpy()), 'denoised_adv_pred': int(denoised_adv_pred[i].argmax().cpu().numpy()) }, ignore_index=True) return results
def main(args): """Evolve to find the optimal top k-pooling for each layer / block""" print('CUDA VERSION:', torch.version.cuda) batch_size, num_workers, IM_SIZE, epochs = int(args.batch_size), int( args.num_workers), int(args.IM_SIZE), int(args.epochs) device = torch.device(args.device) NPARAMS = 4 # there are 4 blocks for topk NPOPULATION = int(args.NPOPULATION) # population size MAX_GENERATIONS = int(args.MAX_GENERATIONS) # number of generations within_block_act, after_block_act = str(args.within_block_act), str( args.after_block_act) model_name = 'PResNetTopK-' + str(within_block_act) + '_' + str( after_block_act) SAVE_PATH = Path(args.SAVE_PATH) SAVE_PATH.mkdir(parents=True, exist_ok=True) with open(args.files_df_loc, 'rb') as f: files_df = pickle.load(f) group_list = [1, 1, 1, 1] # solutions generated from N(0, 1). later transformed [0, 1] with inv cdf es = cma.CMAEvolutionStrategy(NPARAMS * [0], 1) history = {} history['xbest'] = [] history['fbest'] = [] history['xfavorite'] = [] history['NPOPULATION'] = NPOPULATION history['MAX_GENERATIONS'] = MAX_GENERATIONS for j in tqdm(range(MAX_GENERATIONS)): solutions = es.ask() fitness_list = np.zeros(es.popsize) # ??? Make generators for each generation. Does this matter? or just do it once??? if args.dataset == 'CIFAR10': dataloaders = make_generators_DF_cifar(files_df, batch_size, num_workers, size=IM_SIZE, path_colname='path', adv_path_colname=None, return_loc=False) elif args.dataset == 'MNIST': dataloaders = make_generators_DF_MNIST(files_df, batch_size, num_workers, size=IM_SIZE, path_colname='path', adv_path_colname=None, return_loc=False, normalize=True) # evaluate each set of learning rates, using new model each time: for i in range(es.popsize): # convert the normal to a topk probability: topk_list = [norm.cdf(x) for x in solutions[i]] # Create a model with this topk and train it: model = get_PResNetTopK18(within_block_act=within_block_act, after_block_act=after_block_act, frac_list=topk_list, group_list=group_list, num_classes=10) model = model.to(device) metrics = train_net_evol(model, dataloaders, batch_size, epochs, device) # the fitness is the best validation accuracy *-1, because it tries to minimize fitness_list[i] = -1 * metrics['best_val_acc'] es.tell(solutions, fitness_list) # es.logger.add() es.disp() result = es.result history['xbest'].append(result.xbest) history['fbest'].append(result.fbest) history['xfavorite'].append( result.xfavorite) # this is a weird one, maybe try it out print("fitness at generation", (j + 1), result[1]) print("local optimum discovered by solver:\n", result[0]) print("fitness score at this local optimum:", result[1]) print('es.result_pretty-------------------') es.result_pretty() pickle.dump( history, open( str(SAVE_PATH) + '/' + str(model_name) + '_bs_' + str(batch_size) + '_nGen_' + str(MAX_GENERATIONS) + '_nPop_' + str(NPOPULATION) + '_ep_' + str(NPARAMS) + '_history.pkl', "wb"))
def main(args): epochs, batch_size, lr, num_workers = int(args.epochs), int( args.batch_size), float(args.lr), int(args.num_workers) device = torch.device(args.device) MODEL_SAVE_PATH = Path(args.MODEL_SAVE_PATH) MODEL_SAVE_PATH.mkdir(parents=True, exist_ok=True) with open(args.files_df_loc, 'rb') as f: files_df = pickle.load(f) dataloaders = make_generators_DF_cifar(files_df, batch_size, num_workers, size=32, path_colname='path', adv_path_colname='adv_path', return_loc=False) # LOAD EVERYTHING: classifier = load_net_cifar(args.model_loc).to(device) for p in classifier.parameters(): p.requires_grad = False denoiser, model_name = denoise_from_args(args, IM_SIZE=32) denoiser = denoiser.to(device) loss = DenoiseLoss(n=1, hard_mining=0, norm=False) model = DenoiseNet(classifer=classifier, denoiser=denoiser, loss=loss).to(device) print('loaded classifier, denoiser, DenoiseNet') # their default optimizer (but they use batch_size of 60) optimizer = optim.Adam( model.parameters(), lr=lr, weight_decay=0.0001) # using paper init, not the code base_lr = lr best_val_loss = 1000000 metrics = {} metrics['train_adv_acc'] = [] metrics['train_loss'] = [] metrics['val_adv_acc'] = [] metrics['val_loss'] = [] def get_lr(curr_epoch, epochs, base_lr): if epoch <= epochs * 0.6: return base_lr elif epoch <= epochs * 0.9: return base_lr * 0.1 else: return base_lr * 0.01 for epoch in range(epochs): requires_control = epoch == 0 # set learning rate lr = get_lr(epoch, epochs, base_lr) for param_group in optimizer.param_groups: # why this way? param_group['lr'] = lr # train for one epoch train_adv_acc, train_loss = train_epoch_denoise( lr, dataloaders['train'], model, requires_control, optimizer, epoch, device) metrics['train_adv_acc'].append(train_adv_acc) metrics['train_loss'].append(train_loss) # evaluate on validation set val_adv_acc, val_loss = validate_epoc_denoise( dataloaders['val'], model, requires_control, device) metrics['val_adv_acc'].append(val_adv_acc) metrics['val_loss'].append(val_loss) # remember best loss and save checkpoint is_best = val_loss < best_val_loss best_val_loss = min(val_loss, best_val_loss) save_checkpoint( { 'model_name': model_name, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epoch + 1, 'best_val_loss': best_val_loss, 'metrics': metrics, }, is_best, model_name, MODEL_SAVE_PATH)
def main(args): print('CUDA VERSION:', torch.version.cuda) batch_size, num_workers = int(args.batch_size), int(args.num_workers) IM_SIZE = int(args.IM_SIZE) device = torch.device(args.device) NPARAMS = int(args.epochs) # one learning rate (parameter) per epoch NPOPULATION = int(args.NPOPULATION) # population size MAX_GENERATIONS = int(args.MAX_GENERATIONS) # number of generations SAVE_PATH = Path(args.SAVE_PATH) SAVE_PATH.mkdir(parents=True, exist_ok=True) with open(args.files_df_loc, 'rb') as f: files_df = pickle.load(f) # is std of 2 the right way? or too much?. Normally use +13 std for sol, but because using smaller pop use larger one. es = cma.CMAEvolutionStrategy(NPARAMS * [-2], 2) # solutions generated from N(-2, 1), but transformed to 10^sol history = {} history['xbest'] = [] history['fbest'] = [] history['xfavorite'] = [] history['NPOPULATION'] = NPOPULATION history['MAX_GENERATIONS'] = MAX_GENERATIONS for j in tqdm(range(MAX_GENERATIONS)): solutions = es.ask() fitness_list = np.zeros(es.popsize) # ??? Make generators for each generation. Does this matter? or just do it once??? if args.dataset == 'CIFAR10': dataloaders = make_generators_DF_cifar(files_df, batch_size, num_workers, size=IM_SIZE, path_colname='path', adv_path_colname=None, return_loc=False) elif args.dataset == 'MNIST': dataloaders = make_generators_DF_MNIST(files_df, batch_size, num_workers, size=IM_SIZE, path_colname='path', adv_path_colname=None, return_loc=False, normalize=True) # evaluate each set of learning rates, using new model each time: for i in range(es.popsize): model, model_name = net_from_args(args, num_classes=10, IM_SIZE=IM_SIZE) model = model.to(device) # convert the exponenet to a learning rate: lr_list = np.power(10, solutions[i]).tolist() # Train it for the given lr list: metrics = train_net(model, dataloaders, lr_list, batch_size, device) # the fitness is the best validation accuracy *-1, because it tries to minimize fitness_list[i] = -1 * metrics['best_val_acc'] es.tell(solutions, fitness_list) # es.logger.add() es.disp() result = es.result history['xbest'].append(result.xbest) history['fbest'].append(result.fbest) history['xfavorite'].append(result.xfavorite) # this is a weird one, maybe try it out print("fitness at generation", (j+1), result[1]) print("local optimum discovered by solver:\n", result[0]) print("fitness score at this local optimum:", result[1]) print('es.result_pretty-------------------') es.result_pretty() pickle.dump(history, open(str(SAVE_PATH)+'/'+str(model_name)+'_bs_'+str(batch_size)+'_nGen_'+str(MAX_GENERATIONS)+'_nPop_'+str(NPOPULATION)+'_ep_'+str(NPARAMS)+'_history.pkl', "wb"))
def main(args): with torch.cuda.device(1): # ??? Remove this: if args.layer_sizes: args.layer_sizes = [int(i) for i in args.layer_sizes] epochs, batch_size, lr, num_workers = int(args.epochs), int(args.batch_size), float(args.lr), int(args.num_workers) num_labels, IM_SIZE= int(args.num_labels), int(args.IM_SIZE) device = torch.device(args.device) SAVE_PATH = Path(args.SAVE_PATH) SAVE_PATH.mkdir(parents=True, exist_ok=True) with open(args.files_df_loc, 'rb') as f: files_df = pickle.load(f) # Train for each of the labels: for label in range(num_labels): if args.dataset == 'CIFAR10': dataloaders = make_generators_DF_cifar(files_df, batch_size, num_workers, size=IM_SIZE, path_colname='path', adv_path_colname=None, label=label, return_loc=False) elif args.dataset == 'MNIST': dataloaders = make_generators_DF_MNIST(files_df, batch_size, num_workers, size=IM_SIZE, path_colname='path', adv_path_colname=None, label=label, return_loc=False) # get the network model, model_name = vae_from_args(args) model = model.to(device) print('next(model.parameters()).device', next(model.parameters()).device) print(f'--------- Training: {model_name} ---------') # get training parameters and train: optimizer = optim.Adam(model.parameters(), lr=lr) scheduler = lr_scheduler.StepLR(optimizer, step_size=int(epochs/4), gamma=0.2) # close enough metrics = {} metrics['train_losses'] = [] metrics['val_losses'] = [] best_val_loss = 100000000 criterion = model.loss for epoch in range(epochs): # train for one epoch train_losses = train_epoch_auto(epoch, lr, dataloaders['train'], model, optimizer, criterion, device) metrics['train_losses'].append(train_losses) # evaluate on validation set val_losses = validate_epoch_auto(dataloaders['val'], model, criterion, device) metrics['val_losses'].append(val_losses) scheduler.step() # remember best validation accuracy and save checkpoint is_best = val_losses < best_val_loss best_val_loss = min(val_losses, best_val_loss) save_checkpoint({ 'model_name': model_name, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(), 'epoch': epoch + 1, 'best_val_loss': best_val_loss, 'metrics': metrics, }, is_best, model_name+'_label_'+str(label), SAVE_PATH) RESULT_PATH = str(SAVE_PATH)+'/'+str(model_name)+'_label_'+str(label)+'_metrics.pkl' print('Saving results at:', RESULT_PATH) pickle.dump(metrics, open(RESULT_PATH, "wb"))
def main(args): with torch.cuda.device(1): epochs, batch_size, lr, num_workers = int(args.epochs), int(args.batch_size), float(args.lr), int(args.num_workers) IM_SIZE = int(args.IM_SIZE) device = torch.device(args.device) SAVE_PATH = Path(args.SAVE_PATH) SAVE_PATH.mkdir(parents=True, exist_ok=True) with open(args.files_df_loc, 'rb') as f: files_df = pickle.load(f) # Make generators: if args.dataset == 'CIFAR10': dataloaders = make_generators_DF_cifar(files_df, batch_size, num_workers, size=IM_SIZE, path_colname='path', adv_path_colname=None, return_loc=False) elif args.dataset == 'MNIST': dataloaders = make_generators_DF_MNIST(files_df, batch_size, num_workers, size=IM_SIZE, path_colname='path', adv_path_colname=None, return_loc=False, normalize=True) # get the network model, model_name = net_from_args(args, num_classes=10, IM_SIZE=IM_SIZE) model = model.to(device) print(f'--------- Training: {model_name} ---------') # get training parameters and train: criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=5e-4) scheduler = lr_scheduler.StepLR(optimizer, step_size=int(epochs/3), gamma=0.2) # close enough metrics = {} metrics['train_top1_acc'] = [] metrics['train_losses'] = [] metrics['val_top1_acc'] = [] metrics['val_losses'] = [] best_val_acc = 0 for epoch in tqdm(range(epochs)): # train for one epoch train_top1_acc, train_losses = train_epoch(dataloaders['train'], model, criterion, optimizer, epoch, device) metrics['train_top1_acc'].append(train_top1_acc) metrics['train_losses'].append(train_losses) # evaluate on validation set val_top1_acc, val_losses = validate_epoch(dataloaders['val'], model, device, criterion=None) metrics['val_top1_acc'].append(val_top1_acc) metrics['val_losses'].append(val_losses) scheduler.step() # remember best validation accuracy and save checkpoint is_best = val_top1_acc > best_val_acc best_val_acc = max(val_top1_acc, best_val_acc) save_checkpoint({ 'model_name': model_name, 'state_dict': model.state_dict(), 'optimizer' : optimizer.state_dict(), 'epoch': epoch + 1, 'best_val_acc': best_val_acc, 'metrics': metrics, }, is_best, model_name, SAVE_PATH) pickle.dump(metrics, open(str(SAVE_PATH)+'/'+str(model_name)+'_metrics.pkl', "wb"))