def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', type=str, required=True) parser.add_argument('--device', '-d', default='cuda', help='specifies the main device') parser.add_argument('--all_device_ids', nargs='+', type=str, default=None, help="If not None, this list specifies devices for multiple GPU training. " "The first device should match with the main device (args.device).") parser.add_argument('--batch_size', '-b', type=int, default=256) parser.add_argument('--epochs', '-e', type=int, default=400) parser.add_argument('--stopping_param', type=int, default=2**30) parser.add_argument('--save_iter', '-s', type=int, default=10) parser.add_argument('--vis_iter', '-v', type=int, default=10) parser.add_argument('--log_dir', '-l', type=str, default=None) parser.add_argument('--seed', type=int, default=42) # data parameters parser.add_argument('--dataset', '-D', type=str, default='mnist') parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation') parser.set_defaults(data_augmentation=False) parser.add_argument('--error_prob', '-n', type=float, default=0.0) parser.add_argument('--num_train_examples', type=int, default=None) parser.add_argument('--clean_validation', action='store_true', default=False) parser.add_argument('--resize_to_imagenet', action='store_true', dest='resize_to_imagenet') parser.set_defaults(resize_to_imagenet=False) parser.add_argument('--cache_dataset', action='store_true', dest='cache_dataset') parser.set_defaults(cache_dataset=False) parser.add_argument('--num_workers', type=int, default=0, help='number of workers in data loaders') # hyper-parameters parser.add_argument('--model_class', '-m', type=str, default='ClassifierL2') parser.add_argument('--l2_reg_coef', type=float, default=0.0) parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate') parser.add_argument('--optimizer', type=str, default='adam', choices=['adam', 'sgd']) args = parser.parse_args() print(args) # Load data train_data, val_data, test_data, _ = load_data_from_arguments(args, build_loaders=False) if args.cache_dataset: train_data = CacheDatasetWrapper(train_data) val_data = CacheDatasetWrapper(val_data) test_data = CacheDatasetWrapper(test_data) train_loader, val_loader, test_loader = get_loaders_from_datasets(train_data, val_data, test_data, batch_size=args.batch_size, num_workers=args.num_workers) # Options optimization_args = { 'optimizer': { 'name': args.optimizer, 'lr': args.lr, } } with open(args.config, 'r') as f: architecture_args = json.load(f) model_class = getattr(methods, args.model_class) model = model_class(input_shape=train_loader.dataset[0][0].shape, architecture_args=architecture_args, l2_reg_coef=args.l2_reg_coef, device=args.device, seed=args.seed) metrics_list = [metrics.Accuracy(output_key='pred')] if args.dataset == 'imagenet': metrics_list.append(metrics.TopKAccuracy(k=5, output_key='pred')) callbacks_list = [callbacks.SaveBestWithMetric(metric=metrics_list[0], partition='val', direction='max')] stopper = callbacks.EarlyStoppingWithMetric(metric=metrics_list[0], stopping_param=args.stopping_param, partition='val', direction='max') training.train(model=model, train_loader=train_loader, val_loader=val_loader, epochs=args.epochs, save_iter=args.save_iter, vis_iter=args.vis_iter, optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, stopper=stopper, metrics=metrics_list, callbacks=callbacks_list, device_ids=args.all_device_ids)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', type=str, required=True) parser.add_argument('--device', '-d', default='cuda', help='specifies the main device') parser.add_argument( '--all_device_ids', nargs='+', type=str, default=None, help= "If not None, this list specifies devices for multiple GPU training. " "The first device should match with the main device (args.device).") parser.add_argument('--batch_size', '-b', type=int, default=256) parser.add_argument('--epochs', '-e', type=int, default=400) parser.add_argument('--stopping_param', type=int, default=2**30) parser.add_argument('--save_iter', '-s', type=int, default=2**30) parser.add_argument('--vis_iter', '-v', type=int, default=2**30) parser.add_argument('--seed', type=int, default=42) parser.add_argument( '--num_accumulation_steps', default=1, type=int, help='Number of training steps to accumulate before updating weights') # data parameters parser.add_argument('--dataset', '-D', type=str, default='mnist') parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation') parser.set_defaults(data_augmentation=False) parser.add_argument('--error_prob', '-n', type=float, default=0.0) parser.add_argument('--num_train_examples', type=int, default=None) parser.add_argument('--clean_validation', action='store_true', default=False) parser.add_argument('--resize_to_imagenet', action='store_true', dest='resize_to_imagenet') parser.set_defaults(resize_to_imagenet=False) parser.add_argument('--cache_dataset', action='store_true', dest='cache_dataset') parser.set_defaults(cache_dataset=False) parser.add_argument( '--sample_ranking_file', type=str, default=None, help= 'Points to a pickle file that stores an ordering of examples from least to ' 'most important. The most important args.exclude_ratio number of samples ' 'will be excluded from training.') parser.add_argument('--exclude_ratio', type=float, default=0.0, help='Fraction of examples to exclude.') parser.add_argument('--exclude_side', type=str, default='top', choices=['top', 'bottom'], help='from which side of the order to remove') parser.add_argument('--num_workers', type=int, default=0, help='number of workers in data loaders') # hyper-parameters parser.add_argument('--model_class', '-m', type=str, default='ClassifierL2') parser.add_argument('--l2_reg_coef', type=float, default=0.0) parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate') parser.add_argument('--optimizer', type=str, default='adam', choices=['adam', 'sgd']) parser.add_argument('--random_baseline_seed', type=int, default=42) parser.add_argument('--output_dir', '-o', type=str, default='sample_info/results/data-summarization/') parser.add_argument('--baseline_name', '-B', type=str, required=True) parser.add_argument('--exp_name', '-E', type=str, required=True) args = parser.parse_args() print(args) # set tensorboard log directory args.log_dir = os.path.join(args.output_dir, args.baseline_name, args.exp_name, 'logs') utils.make_path(args.log_dir) # Load data train_data, val_data, test_data, _ = load_data_from_arguments( args, build_loaders=False) # exclude samples np.random.seed(args.random_baseline_seed) order = np.random.permutation(len(train_data)) # if sample ranking file is given, take the order from there if args.sample_ranking_file is not None: with open(args.sample_ranking_file, 'rb') as f: order = pickle.load(f) exclude_count = int(args.exclude_ratio * len(train_data)) if exclude_count == 0: exclude_indices = [] else: if args.exclude_side == 'top': exclude_indices = order[-exclude_count:] else: exclude_indices = order[:exclude_count] train_data = SubsetDataWrapper(dataset=train_data, exclude_indices=exclude_indices) if args.cache_dataset: train_data = CacheDatasetWrapper(train_data) val_data = CacheDatasetWrapper(val_data) test_data = CacheDatasetWrapper(test_data) shuffle_train = (args.batch_size * args.num_accumulation_steps < len(train_data)) train_loader, val_loader, test_loader = get_loaders_from_datasets( train_data, val_data, test_data, batch_size=args.batch_size, num_workers=args.num_workers, shuffle_train=shuffle_train) # Options optimization_args = { 'optimizer': { 'name': args.optimizer, 'lr': args.lr, } } with open(args.config, 'r') as f: architecture_args = json.load(f) model_class = getattr(methods, args.model_class) model = model_class(input_shape=train_loader.dataset[0][0].shape, architecture_args=architecture_args, l2_reg_coef=args.l2_reg_coef, device=args.device, seed=args.seed) # put the model in always eval mode. This makes sure that in case the network has pretrained BatchNorm # layers, their running average is fixed. utils.put_always_eval_mode(model) metrics_list = [ metrics.Accuracy(output_key='pred', one_hot=(train_data[0][1].ndim > 0)) ] if args.dataset == 'imagenet': metrics_list.append(metrics.TopKAccuracy(k=5, output_key='pred')) stopper = callbacks.EarlyStoppingWithMetric( metric=metrics_list[0], stopping_param=args.stopping_param, partition='val', direction='max') training.train(model=model, train_loader=train_loader, val_loader=val_loader, epochs=args.epochs, save_iter=args.save_iter, vis_iter=args.vis_iter, optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, stopper=stopper, metrics=metrics_list, device_ids=args.all_device_ids, num_accumulation_steps=args.num_accumulation_steps) val_preds = utils.apply_on_dataset(model=model, dataset=val_data, cpu=True, partition='val', batch_size=args.batch_size)['pred'] val_acc = metrics_list[0].value(epoch=args.epochs - 1, partition='val') file_name = f'results-{args.exclude_ratio:.4f}' if args.baseline_name == 'random': file_name += f'-{args.random_baseline_seed}' file_name += '.pkl' file_path = os.path.join(args.output_dir, args.baseline_name, args.exp_name, file_name) utils.make_path(os.path.dirname(file_path)) with open(file_path, 'wb') as f: pickle.dump({ 'val_preds': val_preds, 'val_acc': val_acc, 'args': args }, f)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', type=str, required=True) parser.add_argument('--device', '-d', default='cuda', help='specifies the main device') parser.add_argument( '--all_device_ids', nargs='+', type=str, default=None, help= "If not None, this list specifies devices for multiple GPU training. " "The first device should match with the main device (args.device).") parser.add_argument('--batch_size', '-b', type=int, default=2**20) parser.add_argument('--epochs', '-e', type=int, default=2000) parser.add_argument('--stopping_param', type=int, default=2**20) parser.add_argument('--save_iter', '-s', type=int, default=2**20) parser.add_argument('--vis_iter', '-v', type=int, default=2**20) parser.add_argument('--log_dir', '-l', type=str, default='sample_info/logs/junk') parser.add_argument('--seed', type=int, default=42) parser.add_argument( '--num_accumulation_steps', default=1, type=int, help='Number of training steps to accumulate before updating weights') # data parameters parser.add_argument( '--dataset', '-D', type=str, default='mnist4vs9', choices=[ 'mnist4vs9', 'synthetic', 'cifar10-cat-vs-dog', 'cats-and-dogs' ], help='Which dataset to use. One can add more choices if needed.') parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation') parser.set_defaults(data_augmentation=False) parser.add_argument('--error_prob', '-n', type=float, default=0.0) parser.add_argument('--num_train_examples', type=int, default=None) parser.add_argument('--clean_validation', action='store_true', default=False) parser.add_argument('--resize_to_imagenet', action='store_true', dest='resize_to_imagenet') parser.set_defaults(resize_to_imagenet=False) parser.add_argument('--cache_dataset', action='store_true', dest='cache_dataset') parser.set_defaults(cache_dataset=False) parser.add_argument('--num_workers', type=int, default=0, help='number of workers in data loaders') parser.add_argument('--exclude_index', type=int, default=None, help='Index of an example to remove.') # hyper-parameters parser.add_argument('--model_class', '-m', type=str, default='ClassifierL2') parser.add_argument('--linearized', dest='linearized', action='store_true') parser.set_defaults(linearized=False) parser.add_argument('--l2_reg_coef', type=float, default=0.0) parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate') parser.add_argument('--optimizer', type=str, default='sgd', choices=['adam', 'sgd']) parser.add_argument( '--output_dir', '-o', type=str, default='sample_info/results/ground-truth/ground-truth/') parser.add_argument('--exp_name', '-E', type=str, required=True) args = parser.parse_args() print(args) # Build data train_data, val_data, test_data, _ = load_data_from_arguments( args, build_loaders=False) # exclude the example if args.exclude_index is not None: train_data = SubsetDataWrapper(dataset=train_data, exclude_indices=[args.exclude_index]) if args.cache_dataset: train_data = CacheDatasetWrapper(train_data) val_data = CacheDatasetWrapper(val_data) test_data = CacheDatasetWrapper(test_data) shuffle_train = (args.batch_size * args.num_accumulation_steps < len(train_data)) train_loader, val_loader, test_loader = get_loaders_from_datasets( train_data, val_data, test_data, batch_size=args.batch_size, num_workers=args.num_workers, shuffle_train=shuffle_train) # Options optimization_args = { 'optimizer': { 'name': args.optimizer, 'lr': args.lr, } } with open(args.config, 'r') as f: architecture_args = json.load(f) model_class = getattr(methods, args.model_class) model = model_class(input_shape=train_loader.dataset[0][0].shape, architecture_args=architecture_args, l2_reg_coef=args.l2_reg_coef, seed=args.seed, device=args.device) # put the model in always eval mode. This makes sure that in case the network has pretrained BatchNorm # layers, their running average is fixed. utils.put_always_eval_mode(model) if args.linearized: print("Using a linearized model") model = LinearizedModelV2(model=model, train_data=train_data, val_data=val_data, l2_reg_coef=args.l2_reg_coef) if args.dataset == 'synthetic': model.visualize = (lambda *args, **kwargs: {} ) # no visualization is needed metrics_list = [metrics.Accuracy(output_key='pred')] training.train(model=model, train_loader=train_loader, val_loader=val_loader, epochs=args.epochs + 1, save_iter=args.save_iter, vis_iter=args.vis_iter, optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, metrics=metrics_list, device_ids=args.all_device_ids, num_accumulation_steps=args.num_accumulation_steps) params = dict(model.named_parameters()) for k in params.keys(): params[k] = utils.to_cpu(params[k]) val_preds = utils.apply_on_dataset(model=model, dataset=val_data, cpu=True, partition='val', batch_size=args.batch_size)['pred'] val_acc = metrics_list[0].value(epoch=args.epochs, partition='val') exp_dir = os.path.join(args.output_dir, args.exp_name) # if it the the full dataset save params and val_preds, otherwise compare to the saved weights/predictions if args.exclude_index is None: file_path = os.path.join(exp_dir, 'full-data-training.pkl') else: file_path = os.path.join(exp_dir, f'{args.exclude_index}.pkl') utils.make_path(os.path.dirname(file_path)) with open(file_path, 'wb') as f: pickle.dump( { 'weights': params, 'val_preds': val_preds, 'val_acc': val_acc, 'args': args }, f)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', type=str, required=True) parser.add_argument('--device', '-d', default='cuda', help='specifies the main device') parser.add_argument( '--all_device_ids', nargs='+', type=str, default=None, help= "If not None, this list specifies devices for multiple GPU training. " "The first device should match with the main device (args.device).") parser.add_argument('--batch_size', '-b', type=int, default=256) parser.add_argument('--epochs', '-e', type=int, default=400) parser.add_argument('--stopping_param', type=int, default=2**30) parser.add_argument('--save_iter', '-s', type=int, default=10) parser.add_argument('--vis_iter', '-v', type=int, default=10) parser.add_argument('--log_dir', '-l', type=str, default=None) parser.add_argument('--seed', type=int, default=42) # data parameters parser.add_argument('--dataset', '-D', type=str, default='corrupt4_mnist') parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation') parser.set_defaults(data_augmentation=False) parser.add_argument('--error_prob', '-n', type=float, default=0.0) parser.add_argument('--num_train_examples', type=int, default=None) parser.add_argument('--clean_validation', action='store_true', default=False) # hyper-parameters parser.add_argument('--model_class', '-m', type=str, default='ClassifierL2WithGradCollector') parser.add_argument('--weight_decay', type=float, default=0.0) parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate') parser.add_argument('--optimizer', type=str, default='adam', choices=['adam', 'sgd']) parser.add_argument('--output_dir', '-o', type=str, default='results/stability/mnist-4vs9-1000-samples/') args = parser.parse_args() print(args) # Load data # TODO: remove hard coding train_data, val_data, test_data, _ = load_data_from_arguments( { 'dataset': 'mnist', 'num_train_examples': 10 * 500 }, build_loaders=False) train_data = BinaryDatasetWrapper(train_data, which_labels=(4, 9)) val_data = BinaryDatasetWrapper(val_data, which_labels=(4, 9)) test_data = BinaryDatasetWrapper(test_data, which_labels=(4, 9)) train_data = ReturnSampleIndexWrapper(train_data) val_data = ReturnSampleIndexWrapper(val_data) test_data = ReturnSampleIndexWrapper(test_data) train_loader, val_loader, test_loader = get_loaders_from_datasets( train_data, val_data, test_data, batch_size=2**30, shuffle_train=False, num_workers=0) # Options optimization_args = { 'optimizer': { 'name': args.optimizer, 'lr': args.lr, 'weight_decay': args.weight_decay } } with open(args.config, 'r') as f: architecture_args = json.load(f) ts = range(100, 401, 100) for t in ts: model_class = getattr(methods, args.model_class) model = model_class(input_shape=train_loader.dataset[0][0][0].shape, architecture_args=architecture_args, device=args.device, seed=args.seed) metrics_list = [metrics.Accuracy(output_key='pred')] training.train( model=model, train_loader=train_loader, val_loader=val_loader, epochs=t, save_iter=args.save_iter, vis_iter=2**30, # NOTE: never visualize optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, metrics=metrics_list, device_ids=args.all_device_ids) vectors = model._grad_updates norms = [] for i in range(len(train_data)): grad_dict = vectors[i] norm = 0.0 for k, v in grad_dict.items(): norm += torch.norm(v.flatten()) norms.append(norm) quantities = norms meta = { 'description': 'Total gradient update per example. The measures are the norm of total gradient update.', 'time': t, 'continuous': False, 'args': args } process_results(vectors=vectors, quantities=quantities, meta=meta, exp_name=f'total-grad-t{t}', output_dir=args.output_dir, train_data=train_data.dataset)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', type=str, required=True) parser.add_argument('--device', '-d', default='cuda') parser.add_argument( '--all_device_ids', nargs='+', type=str, default=None, help= "If not None, this list specifies devices for multiple GPU training. " "The first device should match with the main device (args.device).") parser.add_argument('--batch_size', '-b', type=int, default=256) parser.add_argument('--epochs', '-e', type=int, default=400) parser.add_argument('--stopping_param', type=int, default=50) parser.add_argument('--save_iter', '-s', type=int, default=10) parser.add_argument('--vis_iter', '-v', type=int, default=10) parser.add_argument('--log_dir', '-l', type=str, default=None) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--dataset', '-D', type=str, default='mnist', choices=[ 'mnist', 'uniform-noise-mnist', 'cifar10', 'uniform-noise-cifar10', 'pair-noise-cifar10', 'cifar100', 'uniform-noise-cifar100', 'clothing1m', 'imagenet' ]) parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation') parser.set_defaults(data_augmentation=False) parser.add_argument('--num_train_examples', type=int, default=None) parser.add_argument('--error_prob', '-n', type=float, default=0.0) parser.add_argument('--clean_validation', dest='clean_validation', action='store_true') parser.set_defaults(clean_validation=False) parser.add_argument('--model_class', '-m', type=str, default='StandardClassifier') parser.add_argument( '--loss_function', type=str, default='ce', choices=['ce', 'mse', 'mae', 'gce', 'dmi', 'fw', 'none']) parser.add_argument('--loss_function_param', type=float, default=1.0) parser.add_argument('--load_from', type=str, default=None) parser.add_argument('--grad_weight_decay', '-L', type=float, default=0.0) parser.add_argument('--grad_l1_penalty', '-S', type=float, default=0.0) parser.add_argument('--lamb', type=float, default=1.0) parser.add_argument('--pretrained_arg', '-r', type=str, default=None) parser.add_argument('--sample_from_q', action='store_true', dest='sample_from_q') parser.set_defaults(sample_from_q=False) parser.add_argument('--q_dist', type=str, default='Gaussian', choices=['Gaussian', 'Laplace', 'dot', 'ce']) parser.add_argument('--no-detach', dest='detach', action='store_false') parser.set_defaults(detach=True) parser.add_argument('--warm_up', type=int, default=0, help='Number of epochs to skip before ' 'starting to train using predicted gradients') parser.add_argument('--weight_decay', type=float, default=0.0) parser.add_argument( '--add_noise', action='store_true', dest='add_noise', help='add noise to the gradients of a standard classifier.') parser.set_defaults(add_noise=False) parser.add_argument('--noise_type', type=str, default='Gaussian', choices=['Gaussian', 'Laplace']) parser.add_argument('--noise_std', type=float, default=0.0) parser.add_argument('--lr', type=float, default=1e-3, help='Learning rate') args = parser.parse_args() print(args) # Load data train_loader, val_loader, test_loader, _ = load_data_from_arguments(args) # Options optimization_args = { 'optimizer': { 'name': 'adam', 'lr': args.lr, 'weight_decay': args.weight_decay } } # optimization_args = { # 'optimizer': { # 'name': 'sgd', # 'lr': 1e-3, # }, # 'scheduler': { # 'step_size': 15, # 'gamma': 1.25 # } # } with open(args.config, 'r') as f: architecture_args = json.load(f) model_class = getattr(methods, args.model_class) model = model_class(input_shape=train_loader.dataset[0][0].shape, architecture_args=architecture_args, pretrained_arg=args.pretrained_arg, device=args.device, grad_weight_decay=args.grad_weight_decay, grad_l1_penalty=args.grad_l1_penalty, lamb=args.lamb, sample_from_q=args.sample_from_q, q_dist=args.q_dist, load_from=args.load_from, loss_function=args.loss_function, loss_function_param=args.loss_function_param, add_noise=args.add_noise, noise_type=args.noise_type, noise_std=args.noise_std, detach=args.detach, warm_up=args.warm_up) metrics_list = [metrics.Accuracy(output_key='pred')] if args.dataset == 'imagenet': metrics_list.append(metrics.TopKAccuracy(k=5, output_key='pred')) callbacks_list = [ callbacks.SaveBestWithMetric(metric=metrics_list[0], partition='val', direction='max') ] stopper = callbacks.EarlyStoppingWithMetric( metric=metrics_list[0], stopping_param=args.stopping_param, partition='val', direction='max') training.train(model=model, train_loader=train_loader, val_loader=val_loader, epochs=args.epochs, save_iter=args.save_iter, vis_iter=args.vis_iter, optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, stopper=stopper, metrics=metrics_list, callbacks=callbacks_list, device_ids=args.all_device_ids) # if training finishes successfully, compute the test score print("Testing the best validation model...") model = utils.load(os.path.join(args.log_dir, 'checkpoints', 'best_val.mdl'), methods=methods, device=args.device) pred = utils.apply_on_dataset(model, test_loader.dataset, batch_size=args.batch_size, output_keys_regexp='pred', description='Testing')['pred'] labels = [p[1] for p in test_loader.dataset] labels = torch.tensor(labels, dtype=torch.long) labels = utils.to_cpu(labels) with open(os.path.join(args.log_dir, 'test_predictions.pkl'), 'wb') as f: pickle.dump({'pred': pred, 'labels': labels}, f) accuracy = torch.mean((pred.argmax(dim=1) == labels).float()) with open(os.path.join(args.log_dir, 'test_accuracy.txt'), 'w') as f: f.write("{}\n".format(accuracy))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', '-c', type=str, required=True) parser.add_argument('--device', '-d', default='cuda') parser.add_argument( '--all_device_ids', nargs='+', type=str, default=None, help= "If not None, this list specifies devices for multiple GPU training. " "The first device should match with the main device (args.device).") parser.add_argument('--batch_size', '-b', type=int, default=128) parser.add_argument('--epochs', '-e', type=int, default=4000) parser.add_argument('--stopping_param', type=int, default=2**30) parser.add_argument('--save_iter', '-s', type=int, default=100) parser.add_argument('--vis_iter', '-v', type=int, default=10) parser.add_argument('--log_dir', '-l', type=str, default=None) parser.add_argument('--seed', type=int, default=42) parser.add_argument('--dataset', '-D', type=str, default='uniform-noise-cifar10', choices=['uniform-noise-cifar10']) parser.add_argument('--data_augmentation', '-A', action='store_true', dest='data_augmentation') parser.set_defaults(data_augmentation=False) parser.add_argument('--num_train_examples', type=int, default=None) parser.add_argument('--error_prob', '-n', type=float, default=0.0) parser.add_argument('--clean_validation', dest='clean_validation', action='store_true') parser.set_defaults(clean_validation=False) parser.add_argument('--model_class', '-m', type=str, default='StandardClassifier') parser.add_argument('--load_from', type=str, default=None) parser.add_argument('--grad_weight_decay', '-L', type=float, default=0.0) parser.add_argument('--lamb', type=float, default=1.0) parser.add_argument('--pretrained_arg', '-r', type=str, default=None) parser.add_argument('--sample_from_q', action='store_true', dest='sample_from_q') parser.set_defaults(sample_from_q=False) parser.add_argument('--q_dist', type=str, default='Gaussian', choices=['Gaussian', 'Laplace', 'dot']) parser.add_argument('--weight_decay', type=float, default=0.0) parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate') parser.add_argument('--k', '-k', type=int, required=False, default=10, help='width parameter of ResNet18-k') parser.add_argument('--exclude_percent', type=float, default=0.0) # TODO: make this argument work args = parser.parse_args() print(args) # Load data train_loader, val_loader, test_loader, _ = load_data_from_arguments(args) # Options optimization_args = { 'optimizer': { 'name': 'adam', 'lr': args.lr, 'weight_decay': args.weight_decay } } with open(args.config, 'r') as f: architecture_args = json.load(f) # set the width parameter k if ('classifier' in architecture_args and architecture_args['classifier'].get( 'net', '').find('double-descent') != -1): architecture_args['classifier']['k'] = args.k if ('q-network' in architecture_args and architecture_args['classifier'].get( 'net', '').find('double-descent') != -1): architecture_args['q-network']['k'] = args.k model_class = getattr(methods, args.model_class) model = model_class(input_shape=train_loader.dataset[0][0].shape, architecture_args=architecture_args, pretrained_arg=args.pretrained_arg, device=args.device, grad_weight_decay=args.grad_weight_decay, lamb=args.lamb, sample_from_q=args.sample_from_q, q_dist=args.q_dist, load_from=args.load_from, loss_function='ce') metrics_list = [metrics.Accuracy(output_key='pred')] if args.dataset == 'imagenet': metrics_list.append(metrics.TopKAccuracy(k=5, output_key='pred')) callbacks_list = [ callbacks.SaveBestWithMetric(metric=metrics_list[0], partition='val', direction='max') ] stopper = callbacks.EarlyStoppingWithMetric( metric=metrics_list[0], stopping_param=args.stopping_param, partition='val', direction='max') training.train(model=model, train_loader=train_loader, val_loader=val_loader, epochs=args.epochs, save_iter=args.save_iter, vis_iter=args.vis_iter, optimization_args=optimization_args, log_dir=args.log_dir, args_to_log=args, stopper=stopper, metrics=metrics_list, callbacks=callbacks_list, device_ids=args.all_device_ids) # test the last model and best model models_to_test = [{ 'name': 'best', 'file': 'best_val_accuracy.mdl' }, { 'name': 'final', 'file': 'final.mdl' }] for spec in models_to_test: print("Testing the {} model...".format(spec['name'])) model = utils.load(os.path.join(args.log_dir, 'checkpoints', spec['file']), methods=methods, device=args.device) pred = utils.apply_on_dataset(model, test_loader.dataset, batch_size=args.batch_size, output_keys_regexp='pred', description='Testing')['pred'] labels = [p[1] for p in test_loader.dataset] labels = torch.tensor(labels, dtype=torch.long) labels = utils.to_cpu(labels) with open( os.path.join(args.log_dir, '{}_test_predictions.pkl'.format(spec['name'])), 'wb') as f: pickle.dump({'pred': pred, 'labels': labels}, f) accuracy = torch.mean((pred.argmax(dim=1) == labels).float()) with open( os.path.join(args.log_dir, '{}_test_accuracy.txt'.format(spec['name'])), 'w') as f: f.write("{}\n".format(accuracy))