def prepare_data(args, train_eval_list, test_list, resize):
    # Data Uplaod
    print('\n[Phase 1] : Data Preparation')

    transform_train = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
    ])  # meanstd transformation

    transform_test = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
    ])

    if (args.dataset == 'cifar10'):
        print("| Preparing CIFAR-10 dataset...")
        sys.stdout.write("| ")
        trainset = torchvision.datasets.CIFAR10(root='./data',
                                                train=True,
                                                download=True,
                                                transform=transform_train)
        testset = torchvision.datasets.CIFAR10(root='./data',
                                               train=False,
                                               download=False,
                                               transform=transform_test)
        outputs = 10
        inputs = 3

    elif (args.dataset == 'cifar100'):
        print("| Preparing CIFAR-100 dataset...")
        sys.stdout.write("| ")
        trainset = torchvision.datasets.CIFAR100(root='./data',
                                                 train=True,
                                                 download=True,
                                                 transform=transform_train)
        testset = torchvision.datasets.CIFAR100(root='./data',
                                                train=False,
                                                download=False,
                                                transform=transform_test)
        outputs = 100
        inputs = 3

    elif (args.dataset == 'fashion-mnist'):
        print("| Preparing Fashion-MNIST dataset...")
        sys.stdout.write("| ")
        if args.debug == True:
            train_eval_set = refactor_dataset_class.VGMMDataset(
                pattern=config_parent.global_index_name,
                root_dir="../" + config_parent.data_path,
                list_idx=train_eval_list,
                transform=transform_train)
            # only get subset of original dataset
            small_size = int(0.01 * len(train_eval_set))
            drop_size = len(train_eval_set) - small_size
            train_eval_set, _ = torch.utils.data.random_split(
                train_eval_set, [small_size, drop_size])

            # split train_eval_set into trainset and evalset
            train_size = int(0.8 * len(train_eval_set))
            eval_size = len(train_eval_set) - train_size
            trainset, evalset = torch.utils.data.random_split(
                train_eval_set, [train_size, eval_size])
            testset = refactor_dataset_class.VGMMDataset(
                pattern=config_parent.global_index_name,
                root_dir="../" + config_parent.data_path,
                list_idx=test_list,
                transform=transform_test)
            small_size = int(0.01 * len(testset))
            drop_size = len(testset) - small_size
            testset, _ = torch.utils.data.random_split(testset,
                                                       [small_size, drop_size])
            outputs = 10
            inputs = 1
        else:

            train_eval_set = refactor_dataset_class.VGMMDataset(
                pattern=config_parent.global_index_name,
                root_dir="../" + config_parent.data_path,
                list_idx=train_eval_list,
                transform=transform_train)
            # split train_eval_set into trainset and evalset
            train_size = int(0.8 * len(train_eval_set))
            eval_size = len(train_eval_set) - train_size
            trainset, evalset = torch.utils.data.random_split(
                train_eval_set, [train_size, eval_size])
            testset = refactor_dataset_class.VGMMDataset(
                pattern=config_parent.global_index_name,
                root_dir="../" + config_parent.data_path,
                list_idx=test_list,
                transform=transform_test)
            outputs = 10
            inputs = 1

    return trainset, evalset, testset, inputs, outputs
    transforms.ToTensor(),
    transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
])  # meanstd transformation

transform_test = transforms.Compose([
    transforms.Resize((resize, resize)),
    transforms.ToTensor(),
    transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
])

start_epoch, num_epochs, batch_size, optim_type = cf.start_epoch, cf.num_epochs, cf.batch_size, cf.optim_type
trainset_org = torchvision.datasets.MNIST(root='./data',
                                          train=True,
                                          download=True,
                                          transform=transform_train)
trainset_refactor = refactor_dataset_class.VGMMDataset(
    transform=transform_train)

trainloader_org = torch.utils.data.DataLoader(trainset_org,
                                              batch_size=batch_size,
                                              shuffle=True,
                                              num_workers=4)
trainloader_refactor = torch.utils.data.DataLoader(trainset_refactor,
                                                   batch_size=batch_size,
                                                   shuffle=False,
                                                   num_workers=4)
# num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name
import utils_parent

X, y = utils_parent.load_mnist("fashion-mnist")
type(y)
y.shape
def prepare_data_for_normal_cv(args, train_eval_list, test_list, resize):
    # Data Uplaod
    print('\n[Phase 1] : Data Preparation')

    transform_train = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
    ])  # meanstd transformation

    transform_test = transforms.Compose([
        transforms.Resize((resize, resize)),
        transforms.ToTensor(),
        transforms.Normalize(cf.mean[args.dataset], cf.std[args.dataset]),
    ])

    if (args.dataset == 'mnist'):
        print("| Preparing fashion MNIST dataset for random cv...")
        sys.stdout.write("| ")
        if args.debug == True:
            train_eval_set = refactor_dataset_class.VGMMDataset(
                pattern=config_parent.global_index_name,
                root_dir="../" + config_parent.data_path,
                index=train_eval_list,
                transform=transform_train,
                cluster=False)
            # only get subset of original dataset
            small_size = int(0.01 * len(train_eval_set))
            drop_size = len(train_eval_set) - small_size
            train_eval_set, _ = torch.utils.data.random_split(
                train_eval_set, [small_size, drop_size])

            # split train_eval_set into trainset and evalset
            train_size = int(0.8 * len(train_eval_set))
            eval_size = len(train_eval_set) - train_size
            trainset, evalset = torch.utils.data.random_split(
                train_eval_set, [train_size, eval_size])
            testset = refactor_dataset_class.VGMMDataset(
                pattern=config_parent.global_index_name,
                root_dir="../" + config_parent.data_path,
                index=test_list,
                transform=transform_test,
                cluster=False)
            small_size = int(0.01 * len(testset))
            drop_size = len(testset) - small_size
            testset, _ = torch.utils.data.random_split(testset,
                                                       [small_size, drop_size])
            outputs = 10
            inputs = 1
        else:
            train_eval_set = refactor_dataset_class.VGMMDataset(
                pattern=config_parent.global_index_name,
                root_dir="../" + config_parent.data_path,
                index=train_eval_list,
                transform=transform_train,
                cluster=False)
            # split train_eval_set into trainset and evalset
            train_size = int(0.8 * len(train_eval_set))
            eval_size = len(train_eval_set) - train_size
            trainset, evalset = torch.utils.data.random_split(
                train_eval_set, [train_size, eval_size])
            testset = refactor_dataset_class.VGMMDataset(
                pattern=config_parent.global_index_name,
                root_dir="../" + config_parent.data_path,
                index=test_list,
                transform=transform_test,
                cluster=False)
            outputs = 10
            inputs = 1

    return trainset, evalset, testset, inputs, outputs
Exemple #4
0
    outputs = 10
    inputs = 3

elif (args.dataset == 'cifar100'):
    print("| Preparing CIFAR-100 dataset...")
    sys.stdout.write("| ")
    trainset = torchvision.datasets.CIFAR100(root='./data', train=True, download=True, transform=transform_train)
    testset = torchvision.datasets.CIFAR100(root='./data', train=False, download=False, transform=transform_test)
    outputs = 100
    inputs = 3

elif (args.dataset == 'mnist'):
    print("| Preparing MNIST dataset...")
    sys.stdout.write("| ")
    #trainset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform_train)
    trainset = refactor_dataset_class.VGMMDataset(transform = transform_train)
    #testset = torchvision.datasets.MNIST(root='./data', train=False, download=False, transform=transform_test)
    #testset = torchvision.datasets.MNIST(root='./data', train=False, download=False, transform=transform_test)
    testset = refactor_dataset_class.VGMMDataset(transform = transform_test)
    outputs = 10  # number of labels
    inputs = 1   # input channel


trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=4)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=4)
# num_workers: how many subprocesses to use for data loading. 0 means that the data will be loaded in the main process. (default: 0)# Return network & file name

def getNetwork(args):
    if (args.net_type == 'lenet'):
        net = BBBLeNet(outputs,inputs)    # inputs is number of input channels
        file_name = 'lenet'