def main(): global args, best_err1, device, num_classes args = get_args() torch.manual_seed(args.random_seed) best_err1 = 100 # define datasets if args.target == "mnist": normalize = transforms.Normalize((0.1307, ), (0.3081, )) transform_train = transforms.Compose( [transforms.ToTensor(), normalize]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) train_set_all = datasets.MNIST('data', train=True, transform=transform_train, target_transform=None, download=True) # set aside 10000 examples from the training set for validation train_set, val_set = torch.utils.data.random_split( train_set_all, [50000, 10000]) # if we do experiments with variable target set size, this will take care of it target_set_size = min(50000, args.target_set_size) train_set, _ = torch.utils.data.random_split( train_set, [target_set_size, 50000 - target_set_size]) test_set = datasets.MNIST('data', train=False, transform=transform_test, target_transform=None, download=True) num_classes = 10 num_channels = 1 input_size = 28 elif args.target == "cifar10": normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) transform_train = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor(), normalize]) transform_test = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor(), normalize]) train_set_all = datasets.CIFAR10('data', train=True, transform=transform_train, target_transform=None, download=True) # set aside 5000 examples from the training set for validation train_set, val_set = torch.utils.data.random_split( train_set_all, [45000, 5000]) # if we do experiments with variable target set size, this will take care of it target_set_size = min(45000, args.target_set_size) train_set, _ = torch.utils.data.random_split( train_set, [target_set_size, 45000 - target_set_size]) test_set = datasets.CIFAR10('data', train=False, transform=transform_test, target_transform=None, download=True) num_classes = 10 num_channels = 3 input_size = 32 else: raise "The dataset is not currently supported" # create data loaders train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False) test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False) if torch.cuda.is_available(): # checks whether a cuda gpu is available device = torch.cuda.current_device() print("use GPU", device) print("GPU ID {}".format(torch.cuda.current_device())) else: print("use CPU") device = torch.device('cpu') # sets the device to be CPU # randomly initualize the images and create associated images # labels [0, 1, 2, ..., 0, 1, 2, ...] distill_labels = torch.arange(num_classes, dtype=torch.long, device=device) \ .repeat(args.num_base_examples // num_classes, 1).reshape(-1) distill_labels = one_hot(distill_labels, num_classes) distill_data = torch.rand(args.num_base_examples, num_channels, input_size, input_size, device=device, requires_grad=True) # define loss function (criterion) criterion = nn.CrossEntropyLoss().to(device=device) data_opt = torch.optim.Adam([distill_data]) cudnn.benchmark = True M.LeNetMeta.meta = True M.AlexCifarNetMeta.meta = True # define the models to use if args.target == "cifar10": model = M.AlexCifarNetMeta(args).to(device=device) else: model = M.LeNetMeta(args).to(device=device) optimizer = torch.optim.Adam(model.parameters()) create_json_experiment_log() # start measuring time start_time = time.time() # initialize early stopping variables ma_list = [] ma_sum = 0 lowest_ma_sum = 999999999 current_num_steps = 0 num_steps_list = [] num_steps_from_min = 0 val_err1 = 100.0 val_loss = 5.0 num_steps_val = 0 with tqdm.tqdm(total=args.epochs) as pbar_epochs: for epoch in range(0, args.epochs): train_err1, train_loss, distill_data, model_loss, ma_list, ma_sum, lowest_ma_sum, current_num_steps, num_steps_list, num_steps_from_min, model, optimizer = \ train(train_loader, model, distill_data, distill_labels, criterion, data_opt, epoch, optimizer, ma_list, ma_sum, lowest_ma_sum, current_num_steps, num_steps_list, num_steps_from_min, normalize) # evaluate on the validation set only every 5 epochs as it can be quite expensive to train a new model from scratch if epoch % 5 == 4: # calculate the number of steps to use if len(num_steps_list) == 0: num_steps_val = current_num_steps else: num_steps_val = int(np.mean(num_steps_list[-3:])) val_err1, val_loss = validate(val_loader, model, criterion, epoch, distill_data, distill_labels, num_steps_val, normalize) # otherwise the stats keep the previous value if val_err1 <= best_err1: best_distill_data = distill_data.detach().clone() best_num_steps = num_steps_val best_err1 = min(val_err1, best_err1) print('Current best val error (top-1 error):', best_err1) pbar_epochs.update(1) experiment_update_dict = { 'train_top_1_error': train_err1, 'train_loss': train_loss, 'val_top_1_error': val_err1, 'val_loss': val_loss, 'model_loss': model_loss, 'epoch': epoch, 'num_val_steps': num_steps_val } # save the best images so that we can analyse them if epoch == args.epochs - 1: experiment_update_dict['data'] = best_distill_data.tolist() update_json_experiment_log_dict(experiment_update_dict) print('Best val error (top-1 error):', best_err1) # stop measuring time experiment_update_dict = {'total_train_time': time.time() - start_time} update_json_experiment_log_dict(experiment_update_dict) # this does number of steps analysis - what happens if we do more or fewer steps for training if args.num_steps_analysis: num_steps_add = [-50, -20, -10, 0, 10, 20, 50, 100] for num_steps_add_item in num_steps_add: # start measuring time for testing start_time = time.time() local_errs = [] local_losses = [] local_num_steps = best_num_steps + num_steps_add_item print('Number of steps for training: ' + str(local_num_steps)) # each number of steps will have a robust estimate by using 20 repetitions for test_i in range(20): print('Test repetition ' + str(test_i)) test_err1, test_loss = test(test_loader, model, criterion, best_distill_data, distill_labels, local_num_steps, normalize) local_errs.append(test_err1) local_losses.append(test_loss) print('Test error (top-1 error):', test_err1) experiment_update_dict = { 'test_top_1_error': local_errs, 'test_loss': local_losses, 'total_test_time': time.time() - start_time, 'num_test_steps': local_num_steps } update_json_experiment_log_dict(experiment_update_dict) else: # evaluate on test set repeatedly for a robust estimate for test_i in range(20): print('Test repetition ' + str(test_i)) test_err1, test_loss = test(test_loader, model, criterion, best_distill_data, distill_labels, best_num_steps, normalize) print('Test error (top-1 error):', test_err1) experiment_update_dict = { 'test_top_1_error': test_err1, 'test_loss': test_loss, 'total_test_time': time.time() - start_time, 'num_test_steps': best_num_steps } update_json_experiment_log_dict(experiment_update_dict)
import re import pickle import pandas as pd import numpy as np from arg_extractor import get_args import os import csv args = get_args() def tokenize_and_clean(string, tokenizer): """ Tokenization/string cleaning for all datasets except for SST. Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py """ # for math equations string = re.sub(r"https?\:\/\/[a-zA-Z0-9][a-zA-Z0-9\.\_\?\=\/\%\-\~\&]+", " ", string) string = re.sub(r'^https?:\/\/.*[\r\n]*', '', string) string = re.sub('<[^>]+>', ' ', string) string = re.sub(r"\$\$.*?\$\$", " ", string) string = re.sub(r"\(.*\(.*?\=.*?\)\)", " ", string) string = re.sub(r"\\\(\\mathop.*?\\\)", " ", string) string = re.sub(r"\\\[\\mathop.*?\\\]", " ", string) string = re.sub(r"[A-Za-z]+\(.*?\)", " ", string) string = re.sub(r"[A-Za-z]+\[.*?\]", " ", string) string = re.sub(r"[0-9][\+\*\\\/\~][0-9]", " ", string) string = re.sub(r"<MATH>\s*[\+\-\*\\\/\~][0-9]", " ", string) string = re.sub(r"<MATH>\s*[\+\-\*\\\/\~\=]", " ", string)
import torchvision from torchvision import transforms import torch import data_providers as data_providers import numpy as np from arg_extractor import get_args from experiment_builder import ExperimentBuilder from model_architectures import ConvolutionalNetwork args = get_args() # get arguments from command line rng = np.random.RandomState(seed=args.seed) # set the seeds for the experiment torch.manual_seed(seed=args.seed) # sets pytorch's seed if args.dataset_name == 'emnist': train_data = data_providers.EMNISTDataProvider( 'train', batch_size=args.batch_size, rng=rng, flatten=False) # initialize our rngs using the argument set seed val_data = data_providers.EMNISTDataProvider( 'valid', batch_size=args.batch_size, rng=rng, flatten=False) # initialize our rngs using the argument set seed test_data = data_providers.EMNISTDataProvider( 'test', batch_size=args.batch_size, rng=rng, flatten=False) # initialize our rngs using the argument set seed num_output_classes = train_data.num_classes elif args.dataset_name == 'cifar10': transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465),
import data_providers as data_providers from arg_extractor import get_args from data_augmentations import Cutout from experiment_builder import ExperimentBuilder from model_architectures import ConvolutionalNetwork args, device = get_args() # get arguments from command line rng = np.random.RandomState(seed=args.seed) # set the seeds for the experiment from torchvision import transforms import torch torch.manual_seed(seed=args.seed) # sets pytorch's seed
def main(): global args, best_err1, device, num_classes args = get_args() torch.manual_seed(args.random_seed) # most cases have 10 classes # if there are more, then it will be reassigned num_classes = 10 best_err1 = 100 # define datasets if args.target == "mnist": transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) train_set_all = datasets.MNIST('data', train=True, transform=transform_train, target_transform=None, download=True) # set aside 10000 examples from the training set for validation train_set, val_set = torch.utils.data.random_split( train_set_all, [50000, 10000]) # if we do experiments with variable target set size, this will take care of it # by default the target set size is 50000 target_set_size = min(50000, args.target_set_size) train_set, _ = torch.utils.data.random_split( train_set, [target_set_size, 50000 - target_set_size]) test_set = datasets.MNIST('data', train=False, transform=transform_test, target_transform=None, download=True) elif args.target == "kmnist": transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) train_set_all = datasets.KMNIST('data', train=True, transform=transform_train, target_transform=None, download=True) # set aside 10000 examples from the training set for validation train_set, val_set = torch.utils.data.random_split( train_set_all, [50000, 10000]) target_set_size = min(50000, args.target_set_size) # if we do experiments with variable target set size, this will take care of it train_set, _ = torch.utils.data.random_split( train_set, [target_set_size, 50000 - target_set_size]) test_set = datasets.KMNIST('data', train=False, transform=transform_test, target_transform=None, download=True) elif args.target == "k49": num_classes = 49 train_images = np.load('./data/k49-train-imgs.npz')['arr_0'] test_images = np.load('./data/k49-test-imgs.npz')['arr_0'] train_labels = np.load('./data/k49-train-labels.npz')['arr_0'] test_labels = np.load('./data/k49-test-labels.npz')['arr_0'] transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) transform_test = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) # set aside about 10% of training data for validation train_set_all = K49Dataset(train_images, train_labels, transform=transform_train) train_set, val_set = torch.utils.data.random_split( train_set_all, [209128, 23237]) # currently we do not support variable target set size for k49 # enable this to use it # target_set_size = min(209128, args.target_set_size) # train_set, _ = torch.utils.data.random_split( # train_set, [target_set_size, 209128 - target_set_size]) test_set = K49Dataset(test_images, test_labels, transform=transform_test) elif args.target == "cifar10": normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) transform_train = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor(), normalize]) transform_test = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor(), normalize]) train_set_all = datasets.CIFAR10('data', train=True, transform=transform_train, target_transform=None, download=True) # set aside 5000 examples from the training set for validation train_set, val_set = torch.utils.data.random_split( train_set_all, [45000, 5000]) # if we do experiments with variable target set size, this will take care of it target_set_size = min(45000, args.target_set_size) train_set, _ = torch.utils.data.random_split( train_set, [target_set_size, 45000 - target_set_size]) test_set = datasets.CIFAR10('data', train=False, transform=transform_test, target_transform=None, download=True) elif args.target == "cifar100": num_classes = 100 normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) transform_train = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor(), normalize]) transform_test = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor(), normalize]) train_set_all = datasets.CIFAR100('data', train=True, transform=transform_train, target_transform=None, download=True) # set aside 5000 examples from the training set for validation train_set, val_set = torch.utils.data.random_split( train_set_all, [45000, 5000]) # if we do experiments with variable target set size, this will take care of it target_set_size = min(45000, args.target_set_size) train_set, _ = torch.utils.data.random_split( train_set, [target_set_size, 45000 - target_set_size]) test_set = datasets.CIFAR100('data', train=False, transform=transform_test, target_transform=None, download=True) # create data loaders if args.baseline: train_loader = torch.utils.data.DataLoader( train_set, batch_size=args.num_base_examples, shuffle=True) else: train_loader = torch.utils.data.DataLoader(train_set, batch_size=args.batch_size, shuffle=True) val_loader = torch.utils.data.DataLoader(val_set, batch_size=args.batch_size, shuffle=False) test_loader = torch.utils.data.DataLoader(test_set, batch_size=args.batch_size, shuffle=False) # create data loaders to get base examples if args.source == "emnist": train_set_source = datasets.EMNIST('data', 'letters', train=True, download=True, transform=transform_train, target_transform=None) train_loader_source = torch.utils.data.DataLoader( train_set_source, batch_size=args.num_base_examples, shuffle=True) elif args.source == "mnist": train_set_source = datasets.MNIST('data', train=True, download=True, transform=transform_train, target_transform=None) train_loader_source = torch.utils.data.DataLoader( train_set_source, batch_size=args.num_base_examples, shuffle=True) elif args.source == "kmnist": train_set_source = datasets.KMNIST('data', train=True, download=True, transform=transform_train, target_transform=None) train_loader_source = torch.utils.data.DataLoader( train_set_source, batch_size=args.num_base_examples, shuffle=True) elif args.source == "cifar10": train_set_source = datasets.CIFAR10('data', train=True, download=True, transform=transform_train, target_transform=None) train_loader_source = torch.utils.data.DataLoader( train_set_source, batch_size=args.num_base_examples, shuffle=True) elif args.source == "cifar100": train_set_source = datasets.CIFAR100('data', train=True, download=True, transform=transform_train, target_transform=None) train_loader_source = torch.utils.data.DataLoader( train_set_source, batch_size=args.num_base_examples, shuffle=True) elif args.source == "svhn": train_set_source = datasets.SVHN('data', split='train', download=True, transform=transform_train, target_transform=None) train_loader_source = torch.utils.data.DataLoader( train_set_source, batch_size=args.num_base_examples, shuffle=True) elif args.source == "cub": # modify the root depending on where you place the images cub_data_root = './data/CUB_200_2011/images' train_set_source = datasets.ImageFolder(cub_data_root, transform=transform_train, target_transform=None) train_loader_source = torch.utils.data.DataLoader( train_set_source, batch_size=args.num_base_examples, shuffle=True) elif args.source == "fake": # there is also an option to use random noise base examples if args.target == "mnist": num_channels = 1 dims = 28 else: num_channels = 3 dims = 32 train_set_source = datasets.FakeData(size=5000, image_size=(num_channels, dims, dims), num_classes=10, transform=transform_train, target_transform=None, random_offset=0) train_loader_source = torch.utils.data.DataLoader( train_set_source, batch_size=args.num_base_examples, shuffle=True) else: # get the fixed images from the same dataset as the training data train_set_source = train_set train_loader_source = torch.utils.data.DataLoader( train_set_source, batch_size=args.num_base_examples, shuffle=True) if torch.cuda.is_available(): # checks whether a cuda gpu is available device = torch.cuda.current_device() print("use GPU", device) print("GPU ID {}".format(torch.cuda.current_device())) else: print("use CPU") device = torch.device('cpu') # sets the device to be CPU train_loader_source_iter = iter(train_loader_source) if args.balanced_source: # use a balanced set of fixed examples - same number of examples per class class_counts = {} fixed_input = [] fixed_target = [] for batch_fixed_i, batch_fixed_t in train_loader_source_iter: if sum(class_counts.values()) >= args.num_base_examples: break for fixed_i, fixed_t in zip(batch_fixed_i, batch_fixed_t): if len(class_counts.keys()) < num_classes: if int(fixed_t) in class_counts: if class_counts[ int(fixed_t )] < args.num_base_examples // num_classes: class_counts[int(fixed_t)] += 1 fixed_input.append(fixed_i) fixed_target.append(int(fixed_t)) else: class_counts[int(int(fixed_t))] = 1 fixed_input.append(fixed_i) fixed_target.append(int(fixed_t)) else: if int(fixed_t) in class_counts: if class_counts[ int(fixed_t )] < args.num_base_examples // num_classes: class_counts[int(fixed_t)] += 1 fixed_input.append(fixed_i) fixed_target.append(int(fixed_t)) fixed_input = torch.stack(fixed_input).to(device=device) fixed_target = torch.Tensor(fixed_target).to(device=device) else: # used for cross-dataset scenario - random selection of classes # not taking into accound the original classes fixed_input, fixed_target = next(train_loader_source_iter) fixed_input = fixed_input.to(device=device) fixed_target = fixed_target.to(device=device) # define loss function (criterion) criterion = nn.CrossEntropyLoss().to(device=device) # start at uniform labels and then learn them labels = torch.zeros((args.num_base_examples, num_classes), requires_grad=True, device=device) labels = labels.new_tensor( [[float(1.0 / num_classes) for e in range(num_classes)] for i in range(args.num_base_examples)], requires_grad=True, device=device) # define an optimizer for labels labels_opt = torch.optim.Adam([labels]) # enable using meta-architectures for second-order meta-learning # allows assigning fast weights cudnn.benchmark = True M.LeNetMeta.meta = True M.AlexCifarNetMeta.meta = True M.BasicBlockMeta.meta = True M.BottleneckMeta.meta = True M.ResNetMeta.meta = True # define the models to use if args.target == "cifar10" or args.target == "cifar100": if args.resnet: model = M.ResNetMeta(dataset=args.target, depth=18, num_classes=num_classes, bottleneck=False, device=device).to(device=device) model_name = 'resnet' else: model = M.AlexCifarNetMeta(args).to(device=device) model_name = 'alexnet' else: model = M.LeNetMeta(args).to(device=device) model_name = 'LeNet' optimizer = torch.optim.Adam(model.parameters()) if args.baseline: create_json_experiment_log(fixed_target) # remap the targets - only relevant in cross-dataset fixed_target = remap_targets(fixed_target, num_classes) # printing the labels helps ensure the seeds work print('The labels of the fixed examples are') print(fixed_target.tolist()) labels = one_hot(fixed_target.long(), num_classes) # add smoothing to the baseline if selected if args.label_smoothing > 0: labels = create_smooth_labels(labels, args.label_smoothing, num_classes) # use the validation set to find a suitable number of iterations for training num_baseline_steps, errors_list, num_steps_used = find_best_num_steps( val_loader, criterion, fixed_input, labels) print('Number of steps to use for the baseline: ' + str(num_baseline_steps)) experiment_update_dict = { 'num_baseline_steps': num_baseline_steps, 'errors_list': errors_list, 'num_steps_used': num_steps_used } update_json_experiment_log_dict(experiment_update_dict) if args.test_various_models: assert args.target == "cifar10", "test various models is only meant to be used for CIFAR-10" model_name_list = ['alexnet', 'LeNet', 'resnet'] for model_name_test in model_name_list: # do 20 repetitions of training from scratch for test_i in range(20): print('Test repetition ' + str(test_i)) test_err1, test_loss = test(test_loader, model_name_test, criterion, fixed_input, labels, num_baseline_steps) print('Test error (top-1 error):', test_err1) experiment_update_dict = { 'test_top_1_error_' + model_name_test: test_err1, 'test_loss_' + model_name_test: test_loss, 'num_test_steps_' + model_name_test: num_baseline_steps } update_json_experiment_log_dict(experiment_update_dict) else: # do 20 repetitions of training from scratch for test_i in range(20): print('Test repetition ' + str(test_i)) test_err1, test_loss = test(test_loader, model_name, criterion, fixed_input, labels, num_baseline_steps) print('Test error (top-1 error):', test_err1) experiment_update_dict = { 'test_top_1_error': test_err1, 'test_loss': test_loss, 'num_test_steps': num_baseline_steps } update_json_experiment_log_dict(experiment_update_dict) else: create_json_experiment_log(fixed_target) # start measuring time start_time = time.time() # initialize variables to decide when to restart a model ma_list = [] ma_sum = 0 lowest_ma_sum = 999999999 current_num_steps = 0 num_steps_list = [] num_steps_from_min = 0 val_err1 = 100.0 val_loss = 5.0 num_steps_val = 0 with tqdm.tqdm(total=args.epochs) as pbar_epochs: for epoch in range(0, args.epochs): train_err1, train_loss, labels, model_loss, ma_list, ma_sum, lowest_ma_sum, current_num_steps, num_steps_list, num_steps_from_min, model, optimizer = \ train(train_loader, model, fixed_input, labels, criterion, labels_opt, epoch, optimizer, ma_list, ma_sum, lowest_ma_sum, current_num_steps, num_steps_list, num_steps_from_min) # evaluate on the validation set only every 5 epochs as it can be quite expensive to train a new model from scratch if epoch % 5 == 4: # calculate the number of steps to use if len(num_steps_list) == 0: num_steps_val = current_num_steps else: num_steps_val = int(np.mean(num_steps_list[-3:])) val_err1, val_loss = validate(val_loader, model, criterion, epoch, fixed_input, labels, num_steps_val) if val_err1 <= best_err1: best_labels = labels.detach().clone() best_num_steps = num_steps_val best_err1 = min(val_err1, best_err1) print('Current best val error (top-1 error):', best_err1) pbar_epochs.update(1) experiment_update_dict = { 'train_top_1_error': train_err1, 'train_loss': train_loss, 'val_top_1_error': val_err1, 'val_loss': val_loss, 'model_loss': model_loss, 'epoch': epoch, 'num_val_steps': num_steps_val } # save the best labels so that we can analyse them if epoch == args.epochs - 1: experiment_update_dict['labels'] = best_labels.tolist() update_json_experiment_log_dict(experiment_update_dict) print('Best val error (top-1 error):', best_err1) # stop measuring time experiment_update_dict = {'total_train_time': time.time() - start_time} update_json_experiment_log_dict(experiment_update_dict) # this does number of steps analysis - what happens if we do more or fewer steps for test training if args.num_steps_analysis: num_steps_add = [-50, -20, -10, 0, 10, 20, 50, 100] for num_steps_add_item in num_steps_add: # start measuring time for testing start_time = time.time() local_errs = [] local_losses = [] local_num_steps = best_num_steps + num_steps_add_item print('Number of steps for training: ' + str(local_num_steps)) # each number of steps will have a robust estimate by using 20 repetitions for test_i in range(20): print('Test repetition ' + str(test_i)) test_err1, test_loss = test(test_loader, model_name, criterion, fixed_input, best_labels, local_num_steps) local_errs.append(test_err1) local_losses.append(test_loss) print('Test error (top-1 error):', test_err1) experiment_update_dict = { 'test_top_1_error': local_errs, 'test_loss': local_losses, 'total_test_time': time.time() - start_time, 'num_test_steps': local_num_steps } update_json_experiment_log_dict(experiment_update_dict) else: if args.test_various_models: assert args.target == "cifar10", "test various models is only meant to be used for CIFAR-10" model_name_list = ['alexnet', 'LeNet', 'resnet'] for model_name_test in model_name_list: for test_i in range(20): print(model_name_test) print('Test repetition ' + str(test_i)) test_err1, test_loss = test(test_loader, model_name_test, criterion, fixed_input, best_labels, best_num_steps) print('Test error (top-1 error):', test_err1) experiment_update_dict = { 'test_top_1_error_' + model_name_test: test_err1, 'test_loss_' + model_name_test: test_loss, 'total_test_time_' + model_name_test: time.time() - start_time, 'num_test_steps_' + model_name_test: best_num_steps } update_json_experiment_log_dict(experiment_update_dict) else: for test_i in range(20): print('Test repetition ' + str(test_i)) test_err1, test_loss = test(test_loader, model_name, criterion, fixed_input, best_labels, best_num_steps) print('Test error (top-1 error):', test_err1) experiment_update_dict = { 'test_top_1_error': test_err1, 'test_loss': test_loss, 'total_test_time': time.time() - start_time, 'num_test_steps': best_num_steps } update_json_experiment_log_dict(experiment_update_dict)
def __call__(self, sample): image = sample h, w = image.shape[:2] new_h, new_w = self.output_size top = np.random.randint(0, h - new_h) left = np.random.randint(0, w - new_w) image = image[top:top + new_h, left:left + new_w] return image args, device = arg_extractor.get_args() print(args) dict = pickle.load(open('dataset/Image_embed_dict.pickle', 'rb')) arr = [ 1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 16, 17, 18, 19, 20, 22, 23, 25, 28, 30, 32, 33, 35, 37, 38, 40, 41, 43, 45, 46, 47, 49, 50, 51, 53, 54, 55, 57, 58, 60, 61, 62, 63, 65, 66, 68, 69, 70, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82, 83, 85, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99 ] arr = arr[args.seed * 5:(args.seed + 1) * 5] composed = transforms.Compose([ Rescale(256), RandomCrop(224), transforms.ToTensor(),
import torch import math import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable from arg_extractor import get_args global_args = get_args() class Attn(nn.Module): def __init__(self, hidden_size): super(Attn, self).__init__() self.hidden_size = hidden_size self.attn = nn.Linear(2*self.hidden_size, self.hidden_size) #create new parameter for attn weights self.v = nn.Parameter(torch.rand(hidden_size)) #initialise attn weight parameter stdv = 1. / math.sqrt(self.v.size(0)) self.v.data.normal_(mean=0, std=stdv) def forward(self, hidden,encoder_outputs): max_len = encoder_outputs.size(0) batch_size = encoder_outputs.size(1) H = hidden.repeat(max_len,1,1).transpose(0,1) #make batch first in the lstm outputs encoder_outputs = encoder_outputs.transpose(0,1) # [B*T*H] #unormalised attention scores attn_energies = self.score(H,encoder_outputs) # compute attention score for each context return F.softmax(attn_energies).unsqueeze(1) # normalize with softmax def score(self, hidden, encoder_outputs): # cat = torch.cat([hidden, encoder_outputs], 2)