def make_data_loader(args, no_aug=False, **kwargs): if args.dataset == 'cifar10': mean = [0.4914, 0.4822, 0.4465] std = [0.2023, 0.1994, 0.2010] elif args.dataset == 'cifar100': mean = [0.5071, 0.4867, 0.4408] std = [0.2675, 0.2565, 0.2761] elif args.dataset == 'miniimagenet': mean = [0.4728, 0.4487, 0.4031] std = [0.2744, 0.2663 , 0.2806] size = 32 if args.dataset == 'miniimagenet': size = 84 if no_aug: transform_train = torchvision.transforms.Compose([ torchvision.transforms.Resize(size), torchvision.transforms.CenterCrop(size), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean, std), ]) else: #Some transformations to avoid fitting to the noise transform_train = torchvision.transforms.Compose([ torchvision.transforms.RandomCrop(size, padding=4), torchvision.transforms.RandomHorizontalFlip(), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean, std), ]) transform_test = torchvision.transforms.Compose([ torchvision.transforms.Resize(size), torchvision.transforms.CenterCrop(size), torchvision.transforms.ToTensor(), torchvision.transforms.Normalize(mean, std) ]) if args.dataset == "cifar10": trainset = datasets.cifar10(transform=transform_train, regim='train') testset = datasets.cifar10(transform=transform_test, regim='val') elif args.dataset == "cifar100": trainset = datasets.cifar100(transform=transform_train, regim='train') testset = datasets.cifar100(transform=transform_test, regim='val') elif args.dataset == "miniimagenet": trainset, testset = datasets.miniimagenet(transform=transform_train, transform_test=transform_test) else: raise NotImplementedError train_loader = torch.utils.data.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(testset, batch_size=args.batch_size, shuffle=False, **kwargs) return train_loader, test_loader
def adversarial_attack(args, model, inv_factors, results_path, fig_path): print("Loading data") if args.data == 'cifar10': test_loader = datasets.cifar10(args.torch_data, splits='test') elif args.data == 'gtsrb': test_loader = datasets.gtsrb(args.data_dir, batch_size=args.batch_size, splits='test') if args.data == 'mnist': test_loader = datasets.mnist(args.torch_data, splits='test') elif args.data == 'tiny': test_loader = datasets.imagenet(args.data_dir, img_size=64, batch_size=args.batch_size, splits='test', tiny=True) elif args.data == 'imagenet': img_size = 224 if args.model in ['googlenet', 'inception_v3']: img_size = 299 test_loader = datasets.imagenet(args.data_dir, img_size, args.batch_size, workers=args.workers, splits='test') if args.epsilon > 0: print(eval_fgsm(model, test_loader, args.epsilon, args.device)[-1]) else: stats_dict = {"eps": [], "acc": [], "ece1": [], "ece2": [], "nll": [], "ent": []} bnn_stats_dict = {"eps": [], "acc": [], "ece1": [], "ece2": [], "nll": [], "ent": []} steps = np.concatenate([np.linspace(0, 0.2, 11), np.linspace(0.3, 1, 8)]) for step in steps: stats = eval_fgsm(model, test_loader, step, args.device, verbose=False)[-1] bnn_stats = eval_fgsm_bnn(model, test_loader, inv_factors, args.estimator, args.samples, step, device=args.device)[-1] for (k1, v1), (k2, v2) in zip(stats.items(), bnn_stats.items()): stats_dict[k1].append(v1) bnn_stats_dict[k2].append(v2) np.savez(results_path + "_fgsm.npz", stats=stats_dict, bnn_stats=bnn_stats_dict) print(tabulate.tabulate(stats_dict, headers="keys")) print(tabulate.tabulate(bnn_stats_dict, headers="keys")) plot.adversarial_results(steps, stats_dict, bnn_stats_dict, fig_path)
def compute_mean_and_std(): # dataset = datasets.ssl_data(is_train = True, supervised = True, data_transforms = data_transformations.tensor_transform) dataset = datasets.cifar10( is_train=True, supervised=True, data_transforms=data_transformations.tensor_transform) loader = torch.utils.data.DataLoader(dataset, batch_size=64, shuffle=False, num_workers=0) mean = 0. std = 0. for batch_idx, (data, _) in enumerate(loader): images = torch.tensor(data.to(device)) batch_samples = images.size( 0) # batch size (the last batch can have smaller size!) images = images.view(batch_samples, images.size(1), -1) mean += images.mean(2).sum(0) std += images.std(2).sum(0) mean /= len(loader.dataset) std /= len(loader.dataset) print(mean) print(std)
def main(args): tf.logging.set_verbosity(tf.logging.INFO) image_size = [args.resolution, args.resolution] if args.dataset == "cifar10": cifar_prepare, dataset_gen = cifar10(True), cifar10(False) train_meta, test_meta = cifar_prepare("train"), cifar_prepare("test") else: raise NotImplementedError params = vars(args) tf.logging.info("Training on %d samples, evaluation on %d samples" % (train_meta["length"], test_meta["length"])) params["steps_per_epoch"] = train_meta["length"] // args.batch_size max_steps = args.num_epochs * params["steps_per_epoch"] params["num_label_classes"] = train_meta["num_classes"] if args.request_from_nni: exporters = [NNIExporter()] else: exporters = [] run_config = tf.estimator.RunConfig( model_dir=args.log_dir, log_step_count_steps=10, save_checkpoints_secs=args.evaluation_interval, save_summary_steps=10) classifier = tf.estimator.Estimator(model_fn=model_fn, params=params, config=run_config) train_spec = tf.estimator.TrainSpec(input_fn=lambda: dataset_gen( "train", image_size, True, args.batch_size), max_steps=max_steps) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: dataset_gen( "test", image_size, False, args.batch_size), throttle_secs=args.evaluation_interval, exporters=exporters) tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)
def test(args, model, fig_path=""): print("Loading data") if args.data == 'cifar10': test_loader = datasets.cifar10(args.torch_data, splits='test') elif args.data == 'gtsrb': test_loader = datasets.gtsrb(args.data_dir, batch_size=args.batch_size, splits='test') if args.data == 'mnist': test_loader = datasets.mnist(args.torch_data, splits='test') elif args.data == 'tiny': test_loader = datasets.imagenet(args.data_dir, img_size=64, batch_size=args.batch_size, splits='test', tiny=True) elif args.data == 'imagenet': img_size = 224 if args.model in ['googlenet', 'inception_v3']: img_size = 299 test_loader = datasets.imagenet(args.data_dir, img_size, args.batch_size, workers=args.workers, splits='test') predictions, labels = eval_nn(model, test_loader, args.device, args.verbose) print("Plotting results") plot.reliability_diagram(predictions, labels, path=fig_path + "_reliability.pdf")
def main(): args = setup() print("Preparing directories") os.makedirs(os.path.join(args.root_dir, "factors"), exist_ok=True) filename = f"{args.prefix}{args.model}_{args.data}_{args.estimator}{args.suffix}" factors_path = os.path.join(args.root_dir, "factors", filename) print("Loading model") if args.model == 'lenet5': model = lenet5.lenet5(pretrained=args.data, device=args.device) elif args.model == 'resnet18' and args.data != 'imagenet': model = resnet.resnet18(pretrained=os.path.join( args.root_dir, 'weights', f"{args.model}_{args.data}.pth"), num_classes=43 if args.data == 'gtsrb' else 10, device=args.device) else: model_class = getattr(torchvision.models, args.model) if args.model in ['googlenet', 'inception_v3']: model = model_class(pretrained=True, aux_logits=False) else: model = model_class(pretrained=True) model.to(args.device).train() if args.parallel: model = torch.nn.parallel.DataParallel(model) if args.estimator != 'inf': print(f"Loading data") if args.data == 'cifar10': data = datasets.cifar10(args.torch_data, args.batch_size, args.workers, args.augment, splits='train') elif args.data == 'mnist': data = datasets.mnist(args.torch_data, args.batch_size, args.workers, args.augment, splits='train') elif args.data == 'gtsrb': data = datasets.gtsrb(args.data_dir, batch_size=args.batch_size, workers=args.workers, splits='train') elif args.data == 'tiny': img_size = 64 data = datasets.imagenet(args.data_dir, img_size, args.batch_size, splits='train', tiny=True) elif args.data == 'imagenet': img_size = 224 if args.model in ['googlenet', 'inception_v3']: img_size = 299 data = datasets.imagenet(args.data_dir, img_size, args.batch_size, workers=args.workers, splits='train') torch.backends.cudnn.benchmark = True print("Computing factors") if args.estimator == 'inf': est = compute_inf(args) elif args.estimator == 'efb': factors = torch.load(factors_path.replace("efb", "kfac") + '.pth') est = compute_factors(args, model, data, factors) else: est = compute_factors(args, model, data) print("Saving factors") if args.estimator == "inf": torch.save(est.state, f"{factors_path}{args.rank}.pth") elif args.estimator == "efb": torch.save(list(est.state.values()), factors_path + '.pth') torch.save(list(est.diags.values()), factors_path.replace("efb", "diag") + '.pth') else: torch.save(list(est.state.values()), factors_path + '.pth')
preprocesses_dataset = lambda dataset: dataset #just a dummy function elif dataset == 'norb_random': print "Using NORB dataset, size = 96" train_x, test_x = norb_random() print train_x.shape print test_x.shape preprocesses_dataset = lambda dataset: dataset #just a dummy function elif dataset == 'ocr_letter': print "Using ocr_letter dataset" train_x, valid_x, test_x = ocr_letter() preprocesses_dataset = lambda dataset: dataset #just a dummy function elif dataset == 'cifar10': print "Using CIFAR10 dataset" train_x, train_t, test_x, test_t = cifar10(num_val=None, normalized=True, centered=False) preprocesses_dataset = lambda dataset: dataset #just a dummy function train_x = train_x.reshape((-1, num_features)) test_x = test_x.reshape((-1, num_features)) else: print 'Wrong dataset', dataset exit() if mode == 'train_full': if dataset in ['sample', 'fixed', 'caltech', 'ocr_letter']: train_x = np.concatenate([train_x, valid_x]) elif mode == 'valid': assert dataset in ['sample', 'fixed', 'ocr_letter', 'caltech'] valid_x = valid_x.astype(np.float32) sh_x_valid = theano.shared(preprocesses_dataset(valid_x), borrow=True)
def main(argv=None): ## Create the directory for training model if gfile.Exists(TRAIN_MODEL_DIR): gfile.DeleteRecursively(TRAIN_MODEL_DIR) gfile.MakeDirs(TRAIN_MODEL_DIR) # Using logging to output and record everything # set up logging to file util.set_logging(os.path.join(TRAIN_MODEL_DIR, 'myapp.log')) # Write down all the FLAGS logging.info('FLAG information') for key, value in tf.app.flags.FLAGS.__flags.iteritems(): logging.info( 'FLAG(%s) : %s'%(key, str(value))) # Select the dataset if FLAGS.dataset == 'cifar10': ds = cifar10() elif FLAGS.dataset == 'cifar100': ds = cifar100() else: raise ValueError('Wrong dataset name. Check FLAGS.dataset') # Download the dataset ds.maybe_download() # Read data train_data, train_labels = ds.read_data(True) TRAIN_SIZE = train_labels.shape[0] logging.info('Training Size = %d', TRAIN_SIZE) # This is where training samples and labels are fed to the graph. # These placeholder nodes will be fed a batch of training data at each # training step using the {feed_dict} argument to the Run() call below. # This part depends on the Dataset train_data_node = tf.placeholder( tf.float32, shape=(FLAGS.batch_size, ds.image_size(), ds.image_size(), ds.num_channel()), name='data_node') train_labels_node = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, ds.num_label()), name='label_node') tf.image_summary('images', train_data_node, max_images=FLAGS.batch_size) # Training Model Architecture # Select the network if FLAGS.network == 'vgg16': network = vgg16() elif FLAGS.network == 'snn30k': network = snn30k() elif FLAGS.network == 'snn30k_wo_norm': network = snn30k_wo_norm() else: raise ValueError('Wrong dataset name. Check FLAGS.network') network_dict= network.model2(data=train_data_node, num_label=ds.num_label() , d1=FLAGS.d1, d2=FLAGS.d2, pair=FLAGS.pair, train=True) logits = network_dict['softmax_linear'] softmax = tf.nn.softmax(logits) tf.histogram_summary('logits', logits) tf.histogram_summary('softmax',softmax) # Define Objective Function cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits, train_labels_node), name='cross_entropy') tf.add_to_collection('losses', cross_entropy ) loss = tf.add_n(tf.get_collection('losses'), name='total_loss') tf.add_to_collection('show', cross_entropy) tf.add_to_collection('show', loss) tf.scalar_summary('loss/total_loss', loss) tf.scalar_summary('loss/entropy', cross_entropy) # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0) # Decay once per epoch, using an exponential schedule starting at 0.01. learning_rate = tf.train.exponential_decay( FLAGS.learning_rate, # Base learning rate. batch * FLAGS.batch_size , # Current index into the dataset. TRAIN_SIZE * FLAGS.decay_step, # Decay step. FLAGS.decay_rate, # Decay rate. staircase=True, name='learning_rate' ) #learning_rate = tf.Variable(FLAGS.learning_rate, name='learning_rate') tf.scalar_summary("learning_rate", learning_rate) tf.add_to_collection('show', learning_rate) # Use simple momentum for the optimization. optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum) # optimizer = tf.train.AdamOptimizer(learning_rate) # Compute the gradients for a list of variables. grads_and_vars = optimizer.compute_gradients(loss, var_list=tf.all_variables()) # Let Batch normalization variables have higher learning rate clipped_grads_and_vars = [] """ for gv in grads_and_vars: if gv[0] is not None: if 'bn_weights' in gv[1].name: clipped_grads_and_vars.append([tf.mul(gv[0], tf.constant([1.00001]) ), gv[1]]) elif 'bn_biases' in gv[1].name: clipped_grads_and_vars.append([tf.mul(gv[0], tf.constant([1.00001]) ), gv[1]]) else: clipped_grads_and_vars.append([gv[0] , gv[1]]) train_op = optimizer.apply_gradients(clipped_grads_and_vars, global_step=batch) """ train_op = optimizer.apply_gradients(grads_and_vars, global_step=batch) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() train_loss=[] train_error= [] # Create a local session to run this computation. with tf.Session() as s: # Create a saver to store all the variables later saver = tf.train.Saver(tf.all_variables()) # Run all the initializers to prepare the trainable parameters. tf.initialize_all_variables().run() summary_writer = tf.train.SummaryWriter(TRAIN_MODEL_DIR, graph_def=s.graph_def) offset_old = 0 logging.info('Initialized!') ret = [] # Loop through training steps. num_steps = FLAGS.num_epochs * TRAIN_SIZE // FLAGS.batch_size session_start_time = time.time() for step in xrange(FLAGS.num_epochs * TRAIN_SIZE // FLAGS.batch_size): # Compute the offset of the current minibatch in the data. # Note that we could use better randomization across epochs. offset = (step * FLAGS.batch_size) % (TRAIN_SIZE - FLAGS.batch_size) batch_data = train_data[offset:(offset + FLAGS.batch_size), :, :, :] batch_labels = train_labels[offset:(offset + FLAGS.batch_size)] # This dictionary maps the batch data (as a np array) to the # node in the graph is should be fed to. feed_dict = {train_data_node: batch_data, train_labels_node: batch_labels} start_time = time.time() # Run the graph and fetch some of the nodes. # Remind : (train_op) is the most parameter here. Without it, Tensorflow will not do the backpropogation. ret.append( s.run( tf.get_collection('show')+[logits, train_op], feed_dict=feed_dict) ) duration = time.time() - start_time train_error.append( util.error_rate(ret[-1][-2], batch_labels )) if step % FLAGS.iter_print_train_info == (FLAGS.iter_print_train_info-1): # Print the training Information logging.info('Epoch %.2f, Step %d' % ((float(step) * FLAGS.batch_size / TRAIN_SIZE), step)) # Print the time information sec_per_batch = float(duration) remaining_sec = int(float((time.time() - session_start_time)/step) * float(num_steps - step)) remaining_time = str(datetime.timedelta(seconds=remaining_sec)) logging.info('%.3f sec/batch, remaining time = %s' %(sec_per_batch, remaining_time)) ret = np.array(ret) for idx, var in enumerate(tf.get_collection('show')): logging.info('Average (%s): %f' % (var.name, np.mean(ret[:, idx])) ) logging.info('Average Train error: %.2f%%' % np.mean(train_error)) train_error = [] ret = [] print('\n') sys.stdout.flush() if step % 100==99: # Save the summary information logging.info('Save the summary information') summary_str = s.run(summary_op, feed_dict) summary_writer.add_summary(summary_str, step) # Per Epoch if(offset < offset_old ): cur_epoch =np.round((float(step) * FLAGS.batch_size / TRAIN_SIZE)) cur_epoch = int(cur_epoch) logging.info('Epoch %d' % cur_epoch) # Randomize Data for Batch normalization logging.info('Reorder data order for Batch Normalization') rand_idx = np.random.permutation(len(train_labels)) train_data = train_data[rand_idx, :, :, :] train_labels = train_labels[rand_idx, :] # Horizontal mirroring logging.info('Randomly horizontal flip the Images') mir_idx = np.random.randint(2, size= len(train_labels)) mir_idx = np.nonzero(mir_idx > 0 )[0] for i in range(ds.num_channel()): train_data[mir_idx, :, :, i ] = train_data[mir_idx, :, ::-1, i] if((cur_epoch % 10) == 9): # Save Model checkpoint_path = os.path.join(TRAIN_MODEL_DIR, 'model.ckpt') logging.info('Save the model : %s'%(checkpoint_path)) saver.save(s, checkpoint_path, global_step=step) sys.stdout.flush() offset_old = offset # Save the last model logging.info('Save the model : %s'%(checkpoint_path)) saver.save(s, checkpoint_path, global_step=step)
def out_of_domain(args, model, inv_factors, results_path="", fig_path=""): """Evaluates the model on in- and out-of-domain data. Each dataset has its own out-of-domain dataset which is loaded automatically alongside the in-domain dataset specified in `args.data`. For each image (batch) in the in- and out-of-domain data a forward pass through the provided `model` is performed and the predictions are stored under `results_path`. This is repeated for the Bayesian variant of the model (Laplace approximation). Parameters ---------- args : Todo: Check type The arguments provided to the script on execution. model : torch.nn.Module Todo: Verify A `torchvision` or custom neural network (a `torch.nn.Module` or `torch.nn.Sequential` instance) inv_factors : list A list KFAC factors, Eigenvectors of KFAC factors or diagonal terms. Todo: INF results_path : string, optional The path where results (in- and out-of-domain predictions) should be stored. Results are not stored if argument `args.no_results` is provided. fig_path : string, optional The path where figures should be stored. Figures are only generated if argument `args.plot` is provided. """ print("Loading data") if args.data == 'cifar10': in_data = datasets.cifar10(args.torch_data, splits='test') out_data = datasets.svhn(args.torch_data, splits='test') elif args.data == 'mnist': in_data = datasets.mnist(args.torch_data, splits='test') out_data = datasets.kmnist(args.torch_data, splits='test') elif args.data == 'gtsrb': in_data = datasets.gtsrb(args.data_dir, batch_size=args.batch_size, splits='test') out_data = datasets.cifar10(args.torch_data, splits='test') elif args.data == 'tiny': in_data = datasets.imagenet(args.data_dir, img_size=64, batch_size=args.batch_size, splits='test', tiny=True, use_cache=True) out_data = datasets.art(args.data_dir, img_size=64, batch_size=args.batch_size, use_cache=True) elif args.data == 'imagenet': img_size = 224 if args.model in ['googlenet', 'inception_v3']: img_size = 299 in_data = datasets.imagenet(args.data_dir, img_size, args.batch_size, workers=args.workers, splits='test', use_cache=True) out_data = datasets.art(args.data_dir, img_size, args.batch_size, workers=args.workers, use_cache=True) # Compute NN and BNN predictions on validation set of training data predictions, bnn_predictions, labels, stats = eval_nn_and_bnn(model, in_data, inv_factors, args.estimator, args.samples, args.stats, args.device, verbose=True) # Compute NN and BNN predictions on out-of-distribution data ood_predictions, bnn_ood_predictions, _, _ = eval_nn_and_bnn(model, out_data, inv_factors, args.estimator, args.samples, False, args.device, verbose=True) if not args.no_results: print("Saving results") np.savez_compressed(results_path, stats=stats, labels=labels, predictions=predictions, bnn_predictions=bnn_predictions, ood_predictions=ood_predictions, bnn_ood_predictions=bnn_ood_predictions) if args.plot: print("Plotting results") fig, ax = plt.subplots(figsize=(12, 7), tight_layout=True) plot.inv_ecdf_vs_pred_entropy(predictions, color='dodgerblue', linestyle='--', axis=ax) plot.inv_ecdf_vs_pred_entropy(ood_predictions, color='crimson', linestyle='--', axis=ax) plot.inv_ecdf_vs_pred_entropy(bnn_predictions, color='dodgerblue', axis=ax) plot.inv_ecdf_vs_pred_entropy(bnn_ood_predictions, color='crimson', axis=ax) ax.legend([f"NN {args.data.upper()} | Acc.: {accuracy(predictions, labels):.2f}%", f"NN OOD", f"BNN {args.data.upper()} | Acc.: {accuracy(bnn_predictions, labels):.2f}%", f"BNN OOD"], fontsize=16, frameon=False) plt.savefig(fig_path + "_ecdf.pdf", forma='pdf', dpi=1200) plot.reliability_diagram(predictions, labels, path=fig_path + "_reliability.pdf") plot.reliability_diagram(bnn_predictions, labels, path=fig_path + "_bnn_reliability.pdf") plot.entropy_hist(predictions, ood_predictions, path=fig_path + "_entropy.pdf") plot.entropy_hist(bnn_predictions, bnn_ood_predictions, path=fig_path + "_bnn_entropy.pdf")
def main(): args = setup() print("Preparing directories") filename = f"{args.prefix}{args.model}_{args.data}_{args.estimator}{args.suffix}" factors_path = os.path.join(args.root_dir, "factors", filename) weights_path = os.path.join(args.root_dir, "weights", f"{args.model}_{args.data}.pth") if args.exp_id == -1: if not args.no_results: os.makedirs(os.path.join(args.results_dir, args.model, "data", args.estimator, args.optimizer), exist_ok=True) if args.plot: os.makedirs(os.path.join(args.results_dir, args.model, "figures", args.estimator, args.optimizer), exist_ok=True) results_path = os.path.join(args.results_dir, args.model, "data", args.estimator, args.optimizer, filename) else: if not args.no_results: os.makedirs(os.path.join(args.results_dir, args.model, "data", args.estimator, args.optimizer, args.exp_id), exist_ok=True) if args.plot: os.makedirs(os.path.join(args.results_dir, args.model, "figures", args.estimator, args.optimizer, args.exp_id), exist_ok=True) results_path = os.path.join(args.results_dir, args.model, "data", args.estimator, args.optimizer, args.exp_id, filename) print("Loading model") if args.model == 'lenet5': model = lenet5(pretrained=args.data, device=args.device) elif args.model == 'resnet18' and args.data != 'imagenet': model = resnet18(pretrained=weights_path, num_classes=43 if args.data == 'gtsrb' else 10, device=args.device) else: model_class = getattr(torchvision.models, args.model) if args.model in ['googlenet', 'inception_v3']: model = model_class(pretrained=True, aux_logits=False) else: model = model_class(pretrained=True) model.to(args.device).eval() if args.parallel: model = torch.nn.parallel.DataParallel(model) print("Loading data") if args.data == 'mnist': val_loader = datasets.mnist(args.torch_data, splits='val') elif args.data == 'cifar10': val_loader = datasets.cifar10(args.torch_data, splits='val') elif args.data == 'gtsrb': val_loader = datasets.gtsrb(args.data_dir, batch_size=args.batch_size, splits='val') elif args.data == 'imagenet': img_size = 224 if args.model in ['googlenet', 'inception_v3']: img_size = 299 val_loader = datasets.imagenet(args.data_dir, img_size, args.batch_size, args.workers, splits='val', use_cache=True, pre_cache=True) print("Loading factors") if args.estimator in ["diag", "kfac"]: factors = torch.load(factors_path + '.pth') elif args.estimator == 'efb': kfac_factors = torch.load(factors_path.replace("efb", "kfac") + '.pth') lambdas = torch.load(factors_path + '.pth') factors = list() eigvecs = get_eigenvectors(kfac_factors) for eigvec, lambda_ in zip(eigvecs, lambdas): factors.append((eigvec[0], eigvec[1], lambda_)) elif args.estimator == 'inf': factors = torch.load(f"{factors_path}{args.rank}.pth") torch.backends.cudnn.benchmark = True norm_min = -10 norm_max = 10 scale_min = -10 scale_max = 10 if args.boundaries: x0 = list() boundaries = [[norm_min, scale_min], [norm_max, scale_max], [norm_min, scale_max], [norm_max, scale_min], [norm_min / 2., scale_min], [norm_max / 2., scale_max], [norm_min, scale_max / 2.], [norm_max, scale_min / 2.], [norm_min / 2., scale_min / 2.], [norm_max / 2., scale_max / 2.], [norm_min / 2., scale_max / 2.], [norm_max / 2., scale_min / 2.]] for b in boundaries: tmp = list() for _ in range(3 if args.layer else 1): tmp.extend(b) x0.append(tmp) else: x0 = None f_norms = np.array([factor.norm().cpu().numpy() for factor in factors]) space = list() for i in range(3 if args.layer else 1): space.append( skopt.space.Real(norm_min, norm_max, name=f"norm{i}", prior='uniform')) space.append( skopt.space.Real(scale_min, scale_max, name=f"scale{i}", prior='uniform')) stats = { "norms": [], "scales": [], "acc": [], "ece": [], "nll": [], "ent": [], "cost": [] } @skopt.utils.use_named_args(dimensions=space) def objective(**params): norms = list() scales = list() for f in f_norms: if args.layer: # Closest to max if abs(f_norms.max() - f) < abs(f_norms.min() - f) and abs( f_norms.max() - f) < abs(f_norms.mean() - f): norms.append(10**params['norm0']) scales.append(10**params['scale0']) # Closest to min elif abs(f_norms.min() - f) < abs(f_norms.max() - f) and abs( f_norms.min() - f) < abs(f_norms.mean() - f): norms.append(10**params['norm1']) scales.append(10**params['scale1']) # Closest to mean else: norms.append(10**params['norm2']) scales.append(10**params['scale2']) else: norms.append(10**params['norm0']) scales.append(10**params['scale0']) if args.layer: print( tabulate.tabulate( { 'Layer': np.arange(len(factors)), 'F-Norm:': f_norms, 'Norms': norms, 'Scales': scales }, headers='keys', numalign='right')) else: print("Norm:", norms[0], "Scale:", scales[0]) try: inv_factors = invert_factors(factors, norms, args.pre_scale * scales, args.estimator) except (RuntimeError, np.linalg.LinAlgError): print(f"Error: Singular matrix") return 200 predictions, labels, _ = eval_bnn(model, val_loader, inv_factors, args.estimator, args.samples, stats=False, device=args.device, verbose=False) err = 100 - accuracy(predictions, labels) ece = 100 * expected_calibration_error(predictions, labels)[0] nll = negative_log_likelihood(predictions, labels) ent = predictive_entropy(predictions, mean=True) stats["norms"].append(norms) stats["scales"].append(scales) stats["acc"].append(100 - err) stats["ece"].append(ece) stats["nll"].append(nll) stats["ent"].append(ent) stats["cost"].append(err + ece) print( f"Err.: {err:.2f}% | ECE: {ece:.2f}% | NLL: {nll:.3f} | Ent.: {ent:.3f}" ) return err + ece with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=FutureWarning) if args.optimizer == "gbrt": res = skopt.gbrt_minimize(func=objective, dimensions=space, n_calls=args.calls, x0=x0, verbose=True, n_jobs=args.workers, n_random_starts=0 if x0 else 10, acq_func='EI') # EI (neg. expected improvement) # LCB (lower confidence bound) # PI (neg. prob. of improvement): Usually favours exploitation over exploration # gp_hedge (choose probabilistically between all) if args.optimizer == "gp": res = skopt.gp_minimize(func=objective, dimensions=space, n_calls=args.calls, x0=x0, verbose=True, n_jobs=args.workers, n_random_starts=0 if x0 else 1, acq_func='gp_hedge') # acq_func: EI (neg. expected improvement), LCB (lower confidence bound), PI (neg. prob. of improvement) # xi: how much improvement one wants over the previous best values. # kappa: Importance of variance of predicted values. High: exploration > exploitation # base_estimator: RF (random forest), ET (extra trees) elif args.optimizer == "forest": res = skopt.forest_minimize(func=objective, dimensions=space, n_calls=args.calls, x0=x0, verbose=True, n_jobs=args.workers, n_random_starts=0 if x0 else 1, acq_func='EI') elif args.optimizer == "random": res = skopt.dummy_minimize(func=objective, dimensions=space, n_calls=args.calls, x0=x0, verbose=True) elif args.optimizer == "grid": space = [ np.arange(norm_min, norm_max + 1, 10), np.arange(scale_min, scale_max + 1, 10) ] res = grid(func=objective, dimensions=space) print(f"Minimal cost of {min(stats['cost'])} found at:") if args.layer: print( tabulate.tabulate( { 'Layer': np.arange(len(factors)), 'F-Norm:': f_norms, 'Norms': stats['norms'][np.argmin(stats['cost'])], 'Scales': stats['scales'][np.argmin(stats['cost'])] }, headers='keys', numalign='right')) else: print("Norm:", stats['norms'][np.argmin(stats['cost'])][0], "Scale:", stats['scales'][np.argmin(stats['cost'])][0]) if not args.no_results: print("Saving results") del res.specs['args']['func'] np.save( results_path + f"_best_params{'_layer.npy' if args.layer else '.npy'}", [ stats['norms'][np.argmin(stats['cost'])], stats['scales'][np.argmin(stats['cost'])] ]) np.save( results_path + f"_hyperopt_stats{'_layer.npy' if args.layer else '.npy'}", stats) skopt.dump( res, results_path + f"_hyperopt_dump{'_layer.pkl' if args.layer else '.pkl'}") if args.plot: print("Plotting results") hyperparameters(args)
elif dataset == 'norb_96': print "Using NORB dataset, size = 96" x, y = load_numpy_subclasses(size=96, normalize=True) x = x.T train_x = x[:24300] test_x = x[24300*2:24300*3] # only for debug, compare generation only del y preprocesses_dataset = lambda dataset: dataset #just a dummy function elif dataset is 'ocr_letter': print "Using ocr_letter dataset" train_x, valid_x, test_x = ocr_letter() preprocesses_dataset = lambda dataset: dataset #just a dummy function elif dataset == 'cifar10': print "Using CIFAR10 dataset" train_x, train_t, test_x, test_t = cifar10(num_val=None, normalized=True, centered=False) preprocesses_dataset = lambda dataset: dataset #just a dummy function train_x = train_x.reshape((-1,num_features)) test_x = test_x.reshape((-1,num_features)) else: print 'Wrong dataset', dataset exit() if dataset in ['sample', 'fixed', 'caltech', 'ocr_letter']: train_x = np.concatenate([train_x,valid_x]) if dataset == 'sample': train_t = np.concatenate([train_t,valid_t]) train_x = train_x.astype(theano.config.floatX) test_x = test_x.astype(theano.config.floatX)
from logger import struct, Logger from train import train, acc_metric, nll_metric from models import vgg H = struct( batch_size=128, val_batch_size=100, epochs=10, lr=0.1, momentum=0.9, ) S = struct(epoch=1, bn=1) log = Logger('cifar10_standard', H, S, overwrite=True, metric_show_freq=100) tn_loader, val_loader = loader(cifar10(), H.batch_size, H.val_batch_size) class Net(nn.Module): def __init__(self): super(Net, self).__init__() self.features = vgg('vgg11') self.classifier = nn.Linear(512, 10) def forward(self, x): out = self.features(x) out = out.view(out.size(0), -1) out = self.classifier(out) out = F.log_softmax(out) return out
def main(argv): patch_conv2d_4_size() ## Parsing arguments parser = argparse.ArgumentParser(prog="main.py") parser.add_argument("--model", required=True, help="model name") parser.add_argument("--gpu", default="0", help="gpu ids, seperate by comma") parser.add_argument( "--resume", "-r", help="resume from checkpoint,specify folder containing the ckpt.t7") parser.add_argument("--dataset", default="cifar", type=str, help="The Dataset") parser.add_argument("--no-cuda", action="store_true", default=False, help="do not use gpu") parser.add_argument("--seed", default=None, help="random seed", type=int) parser.add_argument("--path", default=None, help="imagenet dataset path") args = parser.parse_args(argv) if args.seed is not None: if torch.cuda.is_available(): torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) random.seed(args.seed) device = "cuda" if torch.cuda.is_available( ) and not args.no_cuda else "cpu" if device == "cuda": logging.info("Using GPU! Available gpu count: {}".format( torch.cuda.device_count())) else: logging.info("\033[1;3mWARNING: Using CPU!\033[0m") ## Dataset if args.dataset == "cifar": trainloader, validloader, ori_trainloader, testloader, _ = datasets.cifar10( train_bs=128, test_bs=100, train_transform=None, test_transform=None, train_val_split_ratio=0.9) elif args.dataset == "imagenet": trainloader, validloader, ori_trainloader, testloader, _ = datasets.imagenet( 128, 32, None, None, train_val_split_ratio=None, path=args.path) ## Build model logging.info("==> Building model..") gpus = [int(d) for d in args.gpu.split(",")] torch.cuda.set_device(gpus[0]) net = get_model(args.model)() net = net.to(device) if device == "cuda": cudnn.benchmark = True if len(gpus) > 1: p_net = torch.nn.DataParallel(net, gpus) else: p_net = net tester = Tester(net, p_net, [trainloader, validloader, ori_trainloader], testloader, cfg={"dataset": args.dataset}, log=print) tester.init(device=device, resume=args.resume, pretrain=True) # tester.test(save=False) keep_ratios, sparsity = tester.check_sparsity() print("The final Sparsity is {:.3}, Keep Ratios Are:\n{}".format( sparsity, keep_ratios)) for pc in tester.comp_primals.pc_list: print(pc.comp_names, pc.get_keep_ratio()) _, keep_ratios = tester.get_true_flops()
def main(): # Batch size batch_size = 64 epochs = 5 seed = 18 data = 'ar_faces' # Since model was pretrained on ImageNet, normalize using ImageNet statistics imagenet_mean = (0.485,0.456,0.406) imagenet_std = (0.229,0.224,0.225) # Means and standard deviations for other datasets cifar10_mean = (0.4914,0.4822,0.4465) cifar10_std = (0.247,0.243,0.261) if data=='eth80': val_ratio = 0.1 test_ratio = 0.1 train_set, val_set, test_set = data_split(eth80(), seed=seed, val_ratio=val_ratio, test_ratio=test_ratio) train_xfm = transforms.Compose([ transforms.Resize((224,224)), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(imagenet_mean, imagenet_std) ]) val_xfm = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(imagenet_mean, imagenet_std) ]) train_set.dataset.tranform = train_xfm val_set.dataset.transform = val_xfm test_set.dataset.transform = val_xfm elif data == 'ar_faces': val_ratio = 0.1 test_ratio = 0.1 xfm = transforms.Compose([ transforms.Resize((224, 162)), transforms.Pad((31, 0)), transforms.ToTensor(), transforms.Normalize(imagenet_mean, imagenet_std) ]) train_set, val_set, test_set = data_split(ar_faces(), seed=seed, val_ratio=val_ratio, test_ratio=test_ratio) train_set.dataset.transform = xfm val_set.dataset.transform = xfm test_set.dataset.transform = xfm elif data == 'cifar10': val_ratio = 0.2 test_ratio = 0.2 train_xfm = transforms.Compose([ # transforms.Resize((224,224)), transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(imagenet_mean, imagenet_std) ]) val_xfm = transforms.Compose([ transforms.Resize((224,224)), transforms.ToTensor(), transforms.Normalize(imagenet_mean, imagenet_std) ]) train_set, val_set, test_set = data_split(cifar10(), seed=seed, val_ratio=val_ratio, test_ratio=test_ratio) train_set.dataset.tranform = train_xfm val_set.dataset.transform = val_xfm test_set.dataset.transform = val_xfm # Number of classes num_classes = len(train_set.dataset.classes) # Loaders for each dataset train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True) val_loader = DataLoader(val_set, batch_size=batch_size) test_loader = DataLoader(test_set, batch_size=batch_size) # # Get a batch to visualize # imgs, labels = next(iter(train_loader)) # # Make into grid # imgs_grid = torchvision.utils.make_grid(imgs).permute(1,2,0).numpy() # # Undo normalize # mean = np.array(imagenet_mean)[None,None,:] # std = np.array(imagenet_std)[None,None,:] # imgs_grid = imgs_grid*std+mean # # Print labels and filepaths # print(labels) # # print([test_loader.dataset.dataset.imgs[i][0] for i in test_loader.dataset.indices[0:batch_size]]) # plt.imshow(imgs_grid) # plt.show() # sys.exit() # Load pretrained model model = models.squeezenet1_1(pretrained=True) # Reshape classification layer to have 'num_classes' outputs model.classifier[1] = nn.Conv2d(512, num_classes, kernel_size=(1,1), stride=(1,1)) model.num_classes = num_classes # Replace dropout layer in classifier with batch normalization model.classifier[0] = nn.BatchNorm2d(512) print(model.features) print(model.classifier) # # Resuming training # model = load_model(model, 'squeezenet_ar_faces_9.pt') # Freeze all parameters for p in model.parameters(): p.requires_grad = False # Make classifier and the last 2 layers of the feature extractor trainable # for p in model.features[-1].parameters(): # p.requires_grad = True # for p in model.features[-2].parameters(): # p.requires_grad = True for p in model.classifier.parameters(): p.requires_grad = True trainable_params = [p for p in model.parameters() if p.requires_grad==True] # Cross entropy loss function criterion = nn.CrossEntropyLoss() # Adam optimizer optimizer = optim.Adam(trainable_params, lr=10e-4) # For each epoch, train model on train set and evaluate on eval set for epoch in range(epochs): # # Resuming training # epoch += 10 print('Epoch: {}'.format(epoch)) # Train model, _, _ = train_model(model, train_loader, criterion, optimizer) # Validate val_acc = eval_model(model, val_loader) # Save save_model(model, fname='squeezenet_{}_{}.pt'.format(data, epoch)) # Test test_acc = eval_model(model, test_loader)