Example #1
0
def main():
    global args, best_err1, device, num_classes
    args = get_args()
    torch.manual_seed(args.random_seed)
    best_err1 = 100

    # define datasets
    if args.target == "mnist":
        normalize = transforms.Normalize((0.1307, ), (0.3081, ))
        transform_train = transforms.Compose(
            [transforms.ToTensor(), normalize])

        transform_test = transforms.Compose([transforms.ToTensor(), normalize])

        train_set_all = datasets.MNIST('data',
                                       train=True,
                                       transform=transform_train,
                                       target_transform=None,
                                       download=True)
        # set aside 10000 examples from the training set for validation
        train_set, val_set = torch.utils.data.random_split(
            train_set_all, [50000, 10000])
        # if we do experiments with variable target set size, this will take care of it
        target_set_size = min(50000, args.target_set_size)
        train_set, _ = torch.utils.data.random_split(
            train_set, [target_set_size, 50000 - target_set_size])
        test_set = datasets.MNIST('data',
                                  train=False,
                                  transform=transform_test,
                                  target_transform=None,
                                  download=True)
        num_classes = 10
        num_channels = 1
        input_size = 28
    elif args.target == "cifar10":
        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
        transform_train = transforms.Compose(
            [transforms.Resize((32, 32)),
             transforms.ToTensor(), normalize])

        transform_test = transforms.Compose(
            [transforms.Resize((32, 32)),
             transforms.ToTensor(), normalize])
        train_set_all = datasets.CIFAR10('data',
                                         train=True,
                                         transform=transform_train,
                                         target_transform=None,
                                         download=True)
        # set aside 5000 examples from the training set for validation
        train_set, val_set = torch.utils.data.random_split(
            train_set_all, [45000, 5000])
        # if we do experiments with variable target set size, this will take care of it
        target_set_size = min(45000, args.target_set_size)
        train_set, _ = torch.utils.data.random_split(
            train_set, [target_set_size, 45000 - target_set_size])
        test_set = datasets.CIFAR10('data',
                                    train=False,
                                    transform=transform_test,
                                    target_transform=None,
                                    download=True)
        num_classes = 10
        num_channels = 3
        input_size = 32
    else:
        raise "The dataset is not currently supported"

    # create data loaders
    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=args.batch_size,
                                               shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=args.batch_size,
                                             shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False)

    if torch.cuda.is_available():  # checks whether a cuda gpu is available
        device = torch.cuda.current_device()
        print("use GPU", device)
        print("GPU ID {}".format(torch.cuda.current_device()))
    else:
        print("use CPU")
        device = torch.device('cpu')  # sets the device to be CPU

    # randomly initualize the images and create associated images
    # labels [0, 1, 2, ..., 0, 1, 2, ...]
    distill_labels = torch.arange(num_classes, dtype=torch.long, device=device) \
        .repeat(args.num_base_examples // num_classes, 1).reshape(-1)
    distill_labels = one_hot(distill_labels, num_classes)
    distill_data = torch.rand(args.num_base_examples,
                              num_channels,
                              input_size,
                              input_size,
                              device=device,
                              requires_grad=True)
    # define loss function (criterion)
    criterion = nn.CrossEntropyLoss().to(device=device)
    data_opt = torch.optim.Adam([distill_data])
    cudnn.benchmark = True
    M.LeNetMeta.meta = True
    M.AlexCifarNetMeta.meta = True

    # define the models to use
    if args.target == "cifar10":
        model = M.AlexCifarNetMeta(args).to(device=device)
    else:
        model = M.LeNetMeta(args).to(device=device)
    optimizer = torch.optim.Adam(model.parameters())

    create_json_experiment_log()

    # start measuring time
    start_time = time.time()

    # initialize early stopping variables
    ma_list = []
    ma_sum = 0
    lowest_ma_sum = 999999999
    current_num_steps = 0
    num_steps_list = []
    num_steps_from_min = 0

    val_err1 = 100.0
    val_loss = 5.0
    num_steps_val = 0

    with tqdm.tqdm(total=args.epochs) as pbar_epochs:
        for epoch in range(0, args.epochs):
            train_err1, train_loss, distill_data, model_loss, ma_list, ma_sum, lowest_ma_sum, current_num_steps, num_steps_list, num_steps_from_min, model, optimizer = \
                train(train_loader, model, distill_data, distill_labels, criterion, data_opt, epoch, optimizer,
                      ma_list, ma_sum, lowest_ma_sum, current_num_steps, num_steps_list, num_steps_from_min, normalize)
            # evaluate on the validation set only every 5 epochs as it can be quite expensive to train a new model from scratch
            if epoch % 5 == 4:
                # calculate the number of steps to use
                if len(num_steps_list) == 0:
                    num_steps_val = current_num_steps
                else:
                    num_steps_val = int(np.mean(num_steps_list[-3:]))

                val_err1, val_loss = validate(val_loader, model, criterion,
                                              epoch, distill_data,
                                              distill_labels, num_steps_val,
                                              normalize)
                # otherwise the stats keep the previous value

                if val_err1 <= best_err1:
                    best_distill_data = distill_data.detach().clone()
                    best_num_steps = num_steps_val
                    best_err1 = min(val_err1, best_err1)

                print('Current best val error (top-1 error):', best_err1)

            pbar_epochs.update(1)

            experiment_update_dict = {
                'train_top_1_error': train_err1,
                'train_loss': train_loss,
                'val_top_1_error': val_err1,
                'val_loss': val_loss,
                'model_loss': model_loss,
                'epoch': epoch,
                'num_val_steps': num_steps_val
            }
            # save the best images so that we can analyse them
            if epoch == args.epochs - 1:
                experiment_update_dict['data'] = best_distill_data.tolist()

            update_json_experiment_log_dict(experiment_update_dict)

    print('Best val error (top-1 error):', best_err1)

    # stop measuring time
    experiment_update_dict = {'total_train_time': time.time() - start_time}
    update_json_experiment_log_dict(experiment_update_dict)

    # this does number of steps analysis - what happens if we do more or fewer steps for training
    if args.num_steps_analysis:
        num_steps_add = [-50, -20, -10, 0, 10, 20, 50, 100]

        for num_steps_add_item in num_steps_add:
            # start measuring time for testing
            start_time = time.time()
            local_errs = []
            local_losses = []
            local_num_steps = best_num_steps + num_steps_add_item
            print('Number of steps for training: ' + str(local_num_steps))
            # each number of steps will have a robust estimate by using 20 repetitions
            for test_i in range(20):
                print('Test repetition ' + str(test_i))
                test_err1, test_loss = test(test_loader, model, criterion,
                                            best_distill_data, distill_labels,
                                            local_num_steps, normalize)
                local_errs.append(test_err1)
                local_losses.append(test_loss)
                print('Test error (top-1 error):', test_err1)
            experiment_update_dict = {
                'test_top_1_error': local_errs,
                'test_loss': local_losses,
                'total_test_time': time.time() - start_time,
                'num_test_steps': local_num_steps
            }
            update_json_experiment_log_dict(experiment_update_dict)
    else:
        # evaluate on test set repeatedly for a robust estimate
        for test_i in range(20):
            print('Test repetition ' + str(test_i))
            test_err1, test_loss = test(test_loader, model, criterion,
                                        best_distill_data, distill_labels,
                                        best_num_steps, normalize)

            print('Test error (top-1 error):', test_err1)
            experiment_update_dict = {
                'test_top_1_error': test_err1,
                'test_loss': test_loss,
                'total_test_time': time.time() - start_time,
                'num_test_steps': best_num_steps
            }
            update_json_experiment_log_dict(experiment_update_dict)
Example #2
0
import re
import pickle
import pandas as pd
import numpy as np
from arg_extractor import get_args
import os
import csv
args = get_args()


def tokenize_and_clean(string, tokenizer):
    """
    Tokenization/string cleaning for all datasets except for SST.
    Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
    """
    # for math equations
    string = re.sub(r"https?\:\/\/[a-zA-Z0-9][a-zA-Z0-9\.\_\?\=\/\%\-\~\&]+",
                    " ", string)
    string = re.sub(r'^https?:\/\/.*[\r\n]*', '', string)
    string = re.sub('<[^>]+>', ' ', string)

    string = re.sub(r"\$\$.*?\$\$", " ", string)
    string = re.sub(r"\(.*\(.*?\=.*?\)\)", " ", string)
    string = re.sub(r"\\\(\\mathop.*?\\\)", " ", string)
    string = re.sub(r"\\\[\\mathop.*?\\\]", " ", string)
    string = re.sub(r"[A-Za-z]+\(.*?\)", " ", string)
    string = re.sub(r"[A-Za-z]+\[.*?\]", " ", string)
    string = re.sub(r"[0-9][\+\*\\\/\~][0-9]", " ", string)
    string = re.sub(r"<MATH>\s*[\+\-\*\\\/\~][0-9]", " ", string)

    string = re.sub(r"<MATH>\s*[\+\-\*\\\/\~\=]", " ", string)
Example #3
0
import torchvision
from torchvision import transforms
import torch
import data_providers as data_providers
import numpy as np
from arg_extractor import get_args
from experiment_builder import ExperimentBuilder
from model_architectures import ConvolutionalNetwork

args = get_args()  # get arguments from command line
rng = np.random.RandomState(seed=args.seed)  # set the seeds for the experiment
torch.manual_seed(seed=args.seed)  # sets pytorch's seed

if args.dataset_name == 'emnist':
    train_data = data_providers.EMNISTDataProvider(
        'train', batch_size=args.batch_size, rng=rng,
        flatten=False)  # initialize our rngs using the argument set seed
    val_data = data_providers.EMNISTDataProvider(
        'valid', batch_size=args.batch_size, rng=rng,
        flatten=False)  # initialize our rngs using the argument set seed
    test_data = data_providers.EMNISTDataProvider(
        'test', batch_size=args.batch_size, rng=rng,
        flatten=False)  # initialize our rngs using the argument set seed
    num_output_classes = train_data.num_classes

elif args.dataset_name == 'cifar10':
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
Example #4
0


import data_providers as data_providers

from arg_extractor import get_args

from data_augmentations import Cutout

from experiment_builder import ExperimentBuilder

from model_architectures import ConvolutionalNetwork



args, device = get_args()  # get arguments from command line

rng = np.random.RandomState(seed=args.seed)  # set the seeds for the experiment



from torchvision import transforms

import torch



torch.manual_seed(seed=args.seed)  # sets pytorch's seed


Example #5
0
def main():
    global args, best_err1, device, num_classes
    args = get_args()
    torch.manual_seed(args.random_seed)

    # most cases have 10 classes
    # if there are more, then it will be reassigned
    num_classes = 10
    best_err1 = 100

    # define datasets
    if args.target == "mnist":
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])

        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])

        train_set_all = datasets.MNIST('data',
                                       train=True,
                                       transform=transform_train,
                                       target_transform=None,
                                       download=True)
        # set aside 10000 examples from the training set for validation
        train_set, val_set = torch.utils.data.random_split(
            train_set_all, [50000, 10000])
        # if we do experiments with variable target set size, this will take care of it
        # by default the target set size is 50000
        target_set_size = min(50000, args.target_set_size)
        train_set, _ = torch.utils.data.random_split(
            train_set, [target_set_size, 50000 - target_set_size])
        test_set = datasets.MNIST('data',
                                  train=False,
                                  transform=transform_test,
                                  target_transform=None,
                                  download=True)
    elif args.target == "kmnist":
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])

        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])

        train_set_all = datasets.KMNIST('data',
                                        train=True,
                                        transform=transform_train,
                                        target_transform=None,
                                        download=True)
        # set aside 10000 examples from the training set for validation
        train_set, val_set = torch.utils.data.random_split(
            train_set_all, [50000, 10000])
        target_set_size = min(50000, args.target_set_size)
        # if we do experiments with variable target set size, this will take care of it
        train_set, _ = torch.utils.data.random_split(
            train_set, [target_set_size, 50000 - target_set_size])
        test_set = datasets.KMNIST('data',
                                   train=False,
                                   transform=transform_test,
                                   target_transform=None,
                                   download=True)
    elif args.target == "k49":
        num_classes = 49
        train_images = np.load('./data/k49-train-imgs.npz')['arr_0']
        test_images = np.load('./data/k49-test-imgs.npz')['arr_0']
        train_labels = np.load('./data/k49-train-labels.npz')['arr_0']
        test_labels = np.load('./data/k49-test-labels.npz')['arr_0']

        transform_train = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])

        transform_test = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])
        # set aside about 10% of training data for validation
        train_set_all = K49Dataset(train_images,
                                   train_labels,
                                   transform=transform_train)
        train_set, val_set = torch.utils.data.random_split(
            train_set_all, [209128, 23237])

        # currently we do not support variable target set size for k49
        # enable this to use it
        # target_set_size = min(209128, args.target_set_size)
        # train_set, _ = torch.utils.data.random_split(
        #     train_set, [target_set_size, 209128 - target_set_size])
        test_set = K49Dataset(test_images,
                              test_labels,
                              transform=transform_test)
    elif args.target == "cifar10":
        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
        transform_train = transforms.Compose(
            [transforms.Resize((32, 32)),
             transforms.ToTensor(), normalize])

        transform_test = transforms.Compose(
            [transforms.Resize((32, 32)),
             transforms.ToTensor(), normalize])

        train_set_all = datasets.CIFAR10('data',
                                         train=True,
                                         transform=transform_train,
                                         target_transform=None,
                                         download=True)
        # set aside 5000 examples from the training set for validation
        train_set, val_set = torch.utils.data.random_split(
            train_set_all, [45000, 5000])
        # if we do experiments with variable target set size, this will take care of it
        target_set_size = min(45000, args.target_set_size)
        train_set, _ = torch.utils.data.random_split(
            train_set, [target_set_size, 45000 - target_set_size])
        test_set = datasets.CIFAR10('data',
                                    train=False,
                                    transform=transform_test,
                                    target_transform=None,
                                    download=True)
    elif args.target == "cifar100":
        num_classes = 100
        normalize = transforms.Normalize(
            mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
            std=[x / 255.0 for x in [63.0, 62.1, 66.7]])
        transform_train = transforms.Compose(
            [transforms.Resize((32, 32)),
             transforms.ToTensor(), normalize])

        transform_test = transforms.Compose(
            [transforms.Resize((32, 32)),
             transforms.ToTensor(), normalize])

        train_set_all = datasets.CIFAR100('data',
                                          train=True,
                                          transform=transform_train,
                                          target_transform=None,
                                          download=True)
        # set aside 5000 examples from the training set for validation
        train_set, val_set = torch.utils.data.random_split(
            train_set_all, [45000, 5000])
        # if we do experiments with variable target set size, this will take care of it
        target_set_size = min(45000, args.target_set_size)
        train_set, _ = torch.utils.data.random_split(
            train_set, [target_set_size, 45000 - target_set_size])
        test_set = datasets.CIFAR100('data',
                                     train=False,
                                     transform=transform_test,
                                     target_transform=None,
                                     download=True)

    # create data loaders
    if args.baseline:
        train_loader = torch.utils.data.DataLoader(
            train_set, batch_size=args.num_base_examples, shuffle=True)
    else:
        train_loader = torch.utils.data.DataLoader(train_set,
                                                   batch_size=args.batch_size,
                                                   shuffle=True)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=args.batch_size,
                                             shuffle=False)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=args.batch_size,
                                              shuffle=False)

    # create data loaders to get base examples
    if args.source == "emnist":
        train_set_source = datasets.EMNIST('data',
                                           'letters',
                                           train=True,
                                           download=True,
                                           transform=transform_train,
                                           target_transform=None)
        train_loader_source = torch.utils.data.DataLoader(
            train_set_source, batch_size=args.num_base_examples, shuffle=True)
    elif args.source == "mnist":
        train_set_source = datasets.MNIST('data',
                                          train=True,
                                          download=True,
                                          transform=transform_train,
                                          target_transform=None)
        train_loader_source = torch.utils.data.DataLoader(
            train_set_source, batch_size=args.num_base_examples, shuffle=True)
    elif args.source == "kmnist":
        train_set_source = datasets.KMNIST('data',
                                           train=True,
                                           download=True,
                                           transform=transform_train,
                                           target_transform=None)
        train_loader_source = torch.utils.data.DataLoader(
            train_set_source, batch_size=args.num_base_examples, shuffle=True)
    elif args.source == "cifar10":
        train_set_source = datasets.CIFAR10('data',
                                            train=True,
                                            download=True,
                                            transform=transform_train,
                                            target_transform=None)
        train_loader_source = torch.utils.data.DataLoader(
            train_set_source, batch_size=args.num_base_examples, shuffle=True)
    elif args.source == "cifar100":
        train_set_source = datasets.CIFAR100('data',
                                             train=True,
                                             download=True,
                                             transform=transform_train,
                                             target_transform=None)
        train_loader_source = torch.utils.data.DataLoader(
            train_set_source, batch_size=args.num_base_examples, shuffle=True)
    elif args.source == "svhn":
        train_set_source = datasets.SVHN('data',
                                         split='train',
                                         download=True,
                                         transform=transform_train,
                                         target_transform=None)
        train_loader_source = torch.utils.data.DataLoader(
            train_set_source, batch_size=args.num_base_examples, shuffle=True)
    elif args.source == "cub":
        # modify the root depending on where you place the images
        cub_data_root = './data/CUB_200_2011/images'
        train_set_source = datasets.ImageFolder(cub_data_root,
                                                transform=transform_train,
                                                target_transform=None)
        train_loader_source = torch.utils.data.DataLoader(
            train_set_source, batch_size=args.num_base_examples, shuffle=True)
    elif args.source == "fake":
        # there is also an option to use random noise base examples
        if args.target == "mnist":
            num_channels = 1
            dims = 28
        else:
            num_channels = 3
            dims = 32
        train_set_source = datasets.FakeData(size=5000,
                                             image_size=(num_channels, dims,
                                                         dims),
                                             num_classes=10,
                                             transform=transform_train,
                                             target_transform=None,
                                             random_offset=0)
        train_loader_source = torch.utils.data.DataLoader(
            train_set_source, batch_size=args.num_base_examples, shuffle=True)
    else:
        # get the fixed images from the same dataset as the training data
        train_set_source = train_set
        train_loader_source = torch.utils.data.DataLoader(
            train_set_source, batch_size=args.num_base_examples, shuffle=True)

    if torch.cuda.is_available():  # checks whether a cuda gpu is available
        device = torch.cuda.current_device()

        print("use GPU", device)
        print("GPU ID {}".format(torch.cuda.current_device()))
    else:
        print("use CPU")
        device = torch.device('cpu')  # sets the device to be CPU

    train_loader_source_iter = iter(train_loader_source)

    if args.balanced_source:
        # use a balanced set of fixed examples - same number of examples per class
        class_counts = {}
        fixed_input = []
        fixed_target = []

        for batch_fixed_i, batch_fixed_t in train_loader_source_iter:
            if sum(class_counts.values()) >= args.num_base_examples:
                break
            for fixed_i, fixed_t in zip(batch_fixed_i, batch_fixed_t):
                if len(class_counts.keys()) < num_classes:
                    if int(fixed_t) in class_counts:
                        if class_counts[
                                int(fixed_t
                                    )] < args.num_base_examples // num_classes:
                            class_counts[int(fixed_t)] += 1
                            fixed_input.append(fixed_i)
                            fixed_target.append(int(fixed_t))
                    else:
                        class_counts[int(int(fixed_t))] = 1
                        fixed_input.append(fixed_i)
                        fixed_target.append(int(fixed_t))
                else:
                    if int(fixed_t) in class_counts:
                        if class_counts[
                                int(fixed_t
                                    )] < args.num_base_examples // num_classes:
                            class_counts[int(fixed_t)] += 1
                            fixed_input.append(fixed_i)
                            fixed_target.append(int(fixed_t))
        fixed_input = torch.stack(fixed_input).to(device=device)
        fixed_target = torch.Tensor(fixed_target).to(device=device)
    else:
        # used for cross-dataset scenario - random selection of classes
        # not taking into accound the original classes
        fixed_input, fixed_target = next(train_loader_source_iter)
        fixed_input = fixed_input.to(device=device)
        fixed_target = fixed_target.to(device=device)

    # define loss function (criterion)
    criterion = nn.CrossEntropyLoss().to(device=device)

    # start at uniform labels and then learn them
    labels = torch.zeros((args.num_base_examples, num_classes),
                         requires_grad=True,
                         device=device)
    labels = labels.new_tensor(
        [[float(1.0 / num_classes) for e in range(num_classes)]
         for i in range(args.num_base_examples)],
        requires_grad=True,
        device=device)
    # define an optimizer for labels
    labels_opt = torch.optim.Adam([labels])

    # enable using meta-architectures for second-order meta-learning
    # allows assigning fast weights
    cudnn.benchmark = True
    M.LeNetMeta.meta = True
    M.AlexCifarNetMeta.meta = True
    M.BasicBlockMeta.meta = True
    M.BottleneckMeta.meta = True
    M.ResNetMeta.meta = True

    # define the models to use
    if args.target == "cifar10" or args.target == "cifar100":
        if args.resnet:
            model = M.ResNetMeta(dataset=args.target,
                                 depth=18,
                                 num_classes=num_classes,
                                 bottleneck=False,
                                 device=device).to(device=device)
            model_name = 'resnet'
        else:
            model = M.AlexCifarNetMeta(args).to(device=device)
            model_name = 'alexnet'
    else:
        model = M.LeNetMeta(args).to(device=device)
        model_name = 'LeNet'
    optimizer = torch.optim.Adam(model.parameters())

    if args.baseline:
        create_json_experiment_log(fixed_target)
        # remap the targets - only relevant in cross-dataset
        fixed_target = remap_targets(fixed_target, num_classes)
        # printing the labels helps ensure the seeds work
        print('The labels of the fixed examples are')
        print(fixed_target.tolist())
        labels = one_hot(fixed_target.long(), num_classes)

        # add smoothing to the baseline if selected
        if args.label_smoothing > 0:
            labels = create_smooth_labels(labels, args.label_smoothing,
                                          num_classes)

        # use the validation set to find a suitable number of iterations for training
        num_baseline_steps, errors_list, num_steps_used = find_best_num_steps(
            val_loader, criterion, fixed_input, labels)
        print('Number of steps to use for the baseline: ' +
              str(num_baseline_steps))
        experiment_update_dict = {
            'num_baseline_steps': num_baseline_steps,
            'errors_list': errors_list,
            'num_steps_used': num_steps_used
        }
        update_json_experiment_log_dict(experiment_update_dict)

        if args.test_various_models:
            assert args.target == "cifar10", "test various models is only meant to be used for CIFAR-10"
            model_name_list = ['alexnet', 'LeNet', 'resnet']

            for model_name_test in model_name_list:
                # do 20 repetitions of training from scratch
                for test_i in range(20):
                    print('Test repetition ' + str(test_i))
                    test_err1, test_loss = test(test_loader, model_name_test,
                                                criterion, fixed_input, labels,
                                                num_baseline_steps)
                    print('Test error (top-1 error):', test_err1)
                    experiment_update_dict = {
                        'test_top_1_error_' + model_name_test: test_err1,
                        'test_loss_' + model_name_test: test_loss,
                        'num_test_steps_' + model_name_test: num_baseline_steps
                    }
                    update_json_experiment_log_dict(experiment_update_dict)
        else:
            # do 20 repetitions of training from scratch
            for test_i in range(20):
                print('Test repetition ' + str(test_i))
                test_err1, test_loss = test(test_loader, model_name, criterion,
                                            fixed_input, labels,
                                            num_baseline_steps)
                print('Test error (top-1 error):', test_err1)
                experiment_update_dict = {
                    'test_top_1_error': test_err1,
                    'test_loss': test_loss,
                    'num_test_steps': num_baseline_steps
                }
                update_json_experiment_log_dict(experiment_update_dict)

    else:
        create_json_experiment_log(fixed_target)

        # start measuring time
        start_time = time.time()

        # initialize variables to decide when to restart a model
        ma_list = []
        ma_sum = 0
        lowest_ma_sum = 999999999
        current_num_steps = 0
        num_steps_list = []
        num_steps_from_min = 0

        val_err1 = 100.0
        val_loss = 5.0
        num_steps_val = 0

        with tqdm.tqdm(total=args.epochs) as pbar_epochs:
            for epoch in range(0, args.epochs):
                train_err1, train_loss, labels, model_loss, ma_list, ma_sum, lowest_ma_sum, current_num_steps, num_steps_list, num_steps_from_min, model, optimizer = \
                    train(train_loader, model, fixed_input, labels, criterion, labels_opt, epoch, optimizer,
                          ma_list, ma_sum, lowest_ma_sum, current_num_steps, num_steps_list, num_steps_from_min)
                # evaluate on the validation set only every 5 epochs as it can be quite expensive to train a new model from scratch
                if epoch % 5 == 4:
                    # calculate the number of steps to use
                    if len(num_steps_list) == 0:
                        num_steps_val = current_num_steps
                    else:
                        num_steps_val = int(np.mean(num_steps_list[-3:]))

                    val_err1, val_loss = validate(val_loader, model, criterion,
                                                  epoch, fixed_input, labels,
                                                  num_steps_val)

                    if val_err1 <= best_err1:
                        best_labels = labels.detach().clone()
                        best_num_steps = num_steps_val
                        best_err1 = min(val_err1, best_err1)

                    print('Current best val error (top-1 error):', best_err1)

                pbar_epochs.update(1)

                experiment_update_dict = {
                    'train_top_1_error': train_err1,
                    'train_loss': train_loss,
                    'val_top_1_error': val_err1,
                    'val_loss': val_loss,
                    'model_loss': model_loss,
                    'epoch': epoch,
                    'num_val_steps': num_steps_val
                }
                # save the best labels so that we can analyse them
                if epoch == args.epochs - 1:
                    experiment_update_dict['labels'] = best_labels.tolist()

                update_json_experiment_log_dict(experiment_update_dict)

        print('Best val error (top-1 error):', best_err1)

        # stop measuring time
        experiment_update_dict = {'total_train_time': time.time() - start_time}
        update_json_experiment_log_dict(experiment_update_dict)

        # this does number of steps analysis - what happens if we do more or fewer steps for test training
        if args.num_steps_analysis:
            num_steps_add = [-50, -20, -10, 0, 10, 20, 50, 100]

            for num_steps_add_item in num_steps_add:
                # start measuring time for testing
                start_time = time.time()
                local_errs = []
                local_losses = []
                local_num_steps = best_num_steps + num_steps_add_item
                print('Number of steps for training: ' + str(local_num_steps))
                # each number of steps will have a robust estimate by using 20 repetitions
                for test_i in range(20):
                    print('Test repetition ' + str(test_i))
                    test_err1, test_loss = test(test_loader, model_name,
                                                criterion, fixed_input,
                                                best_labels, local_num_steps)
                    local_errs.append(test_err1)
                    local_losses.append(test_loss)
                    print('Test error (top-1 error):', test_err1)
                experiment_update_dict = {
                    'test_top_1_error': local_errs,
                    'test_loss': local_losses,
                    'total_test_time': time.time() - start_time,
                    'num_test_steps': local_num_steps
                }
                update_json_experiment_log_dict(experiment_update_dict)
        else:
            if args.test_various_models:
                assert args.target == "cifar10", "test various models is only meant to be used for CIFAR-10"
                model_name_list = ['alexnet', 'LeNet', 'resnet']

                for model_name_test in model_name_list:
                    for test_i in range(20):
                        print(model_name_test)
                        print('Test repetition ' + str(test_i))
                        test_err1, test_loss = test(test_loader,
                                                    model_name_test, criterion,
                                                    fixed_input, best_labels,
                                                    best_num_steps)
                        print('Test error (top-1 error):', test_err1)
                        experiment_update_dict = {
                            'test_top_1_error_' + model_name_test: test_err1,
                            'test_loss_' + model_name_test: test_loss,
                            'total_test_time_' + model_name_test:
                            time.time() - start_time,
                            'num_test_steps_' + model_name_test: best_num_steps
                        }
                        update_json_experiment_log_dict(experiment_update_dict)
            else:
                for test_i in range(20):
                    print('Test repetition ' + str(test_i))
                    test_err1, test_loss = test(test_loader, model_name,
                                                criterion, fixed_input,
                                                best_labels, best_num_steps)
                    print('Test error (top-1 error):', test_err1)
                    experiment_update_dict = {
                        'test_top_1_error': test_err1,
                        'test_loss': test_loss,
                        'total_test_time': time.time() - start_time,
                        'num_test_steps': best_num_steps
                    }
                    update_json_experiment_log_dict(experiment_update_dict)
Example #6
0
    def __call__(self, sample):
        image = sample

        h, w = image.shape[:2]
        new_h, new_w = self.output_size

        top = np.random.randint(0, h - new_h)
        left = np.random.randint(0, w - new_w)

        image = image[top:top + new_h, left:left + new_w]

        return image


args, device = arg_extractor.get_args()
print(args)

dict = pickle.load(open('dataset/Image_embed_dict.pickle', 'rb'))
arr = [
    1, 2, 3, 4, 6, 7, 8, 9, 11, 12, 13, 16, 17, 18, 19, 20, 22, 23, 25, 28, 30,
    32, 33, 35, 37, 38, 40, 41, 43, 45, 46, 47, 49, 50, 51, 53, 54, 55, 57, 58,
    60, 61, 62, 63, 65, 66, 68, 69, 70, 72, 73, 74, 75, 76, 77, 79, 80, 81, 82,
    83, 85, 86, 88, 89, 90, 91, 93, 94, 95, 96, 97, 98, 99
]
arr = arr[args.seed * 5:(args.seed + 1) * 5]

composed = transforms.Compose([
    Rescale(256),
    RandomCrop(224),
    transforms.ToTensor(),
Example #7
0
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from arg_extractor import get_args
global_args = get_args()
class Attn(nn.Module):

    def __init__(self, hidden_size):
        super(Attn, self).__init__()
        self.hidden_size = hidden_size
        self.attn = nn.Linear(2*self.hidden_size, self.hidden_size)
        #create new parameter for attn weights
        self.v = nn.Parameter(torch.rand(hidden_size))
        #initialise attn weight parameter
        stdv = 1. / math.sqrt(self.v.size(0))
        self.v.data.normal_(mean=0, std=stdv)

    def forward(self, hidden,encoder_outputs):
        max_len = encoder_outputs.size(0)
        batch_size = encoder_outputs.size(1)
        H = hidden.repeat(max_len,1,1).transpose(0,1)
        #make batch first in the lstm outputs
        encoder_outputs = encoder_outputs.transpose(0,1) # [B*T*H]
        #unormalised attention scores
        attn_energies = self.score(H,encoder_outputs) # compute attention score for each context
        return F.softmax(attn_energies).unsqueeze(1) # normalize with softmax

    def score(self, hidden, encoder_outputs):
#         cat = torch.cat([hidden, encoder_outputs], 2)