def get_aeloaders(dataset, batch, dataroot, ae_file, trans_type=TRANSFORMATION.clean): train_sampler, trainloader, validloader, _ = get_dataloaders( dataset, batch, dataroot, trans_type) _, test_aug = get_augmentation(dataset) _, (_, y_test) = load_data(dataset) x_ae = load_model(ae_file) x_ae = transform(x_ae, trans_type) x_ae = data_utils.rescale(x_ae) x_ae = data_utils.set_channels_first(x_ae) testset = MyDataset(x_ae, y_test, aug=test_aug) testloader = torch.utils.data.DataLoader( testset, batch_size=batch, shuffle=False, num_workers=32, pin_memory=torch.cuda.is_available(), drop_last=False) return train_sampler, trainloader, validloader, testloader
def prediction(data, models, nClasses, transformationList, batch_size=32, channel_last=True): ''' input: data: nSamples X <Sample Dimension> models: a list of classification models output: prediction matrix M - nWeakModels X nSamples X nClasses. ''' nSamples, nWeakModels = data.shape[0], len(models) rawPred = np.zeros((nWeakModels, nSamples, nClasses)) transTCs = [] predTCs = [] data = np.float32(data) for mIdx in range(nWeakModels): startTime = time.time() transformationType = transformationList[mIdx] testData = transform(data, transformationType) transTCs.append(time.time()-startTime) if not channel_last: # input shape of cnn model is <n_samples, n_channels, rows, cols> testData = data_utils.set_channels_first(testData) startTime = time.time() rawPred[mIdx] = models[mIdx].predict(testData, batch_size=batch_size) predTCs.append(time.time() - startTime) return rawPred, transTCs, predTCs
def load_data(dataset, trans_type=TRANSFORMATION.clean, trans_set='both'): assert dataset in DATA.get_supported_datasets() assert trans_set is None or trans_set in ['none', 'train', 'test', 'both'] X_train = None Y_train = None X_test = None Y_test = None img_rows = 0 img_cols = 0 nb_channels = 0 nb_classes = 0 if DATA.mnist == dataset: """ Dataset of 60,000 28x28 grayscale images of the 10 digits, along with a test set of 10,000 images. """ (X_train, Y_train), (X_test, Y_test) = mnist.load_data() nb_examples, img_rows, img_cols = X_test.shape nb_channels = 1 nb_classes = 10 elif DATA.fation_mnist == dataset: """ Dataset of 60,000 28x28 grayscale images of 10 fashion categories, along with a test set of 10,000 images. The class labels are: Label Description 0 T-shirt/top 1 Trouser 2 Pullover 3 Dress 4 Coat 5 Sandal 6 Shirt 7 Sneaker 8 Bag 9 Ankle boot """ (X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data() nb_examples, img_rows, img_cols = X_test.shape nb_channels = 1 nb_classes = 10 elif DATA.cifar_10 == dataset: """ Dataset of 50,000 32x32 color training images, labeled over 10 categories, and 10,000 test images. """ (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() nb_examples, img_rows, img_cols, nb_channels = X_test.shape nb_classes = 10 elif DATA.cifar_100 == dataset: (X_train, Y_train), (X_test, Y_test) = cifar100.load_data(label_mode='fine') nb_examples, img_rows, img_cols, nb_channels = X_test.shape nb_classes = 100 X_train = X_train.reshape(-1, img_rows, img_cols, nb_channels) X_test = X_test.reshape(-1, img_rows, img_cols, nb_channels) """ cast pixels to floats, normalize to [0, 1] range """ X_train = X_train.astype(np.float32) X_test = X_test.astype(np.float32) X_train = data_utils.rescale(X_train, range=(0., 1.)) X_test = data_utils.rescale(X_test, range=(0., 1.)) """ one-hot-encode the labels """ Y_train = keras.utils.to_categorical(Y_train, nb_classes) Y_test = keras.utils.to_categorical(Y_test, nb_classes) """ transform images """ if trans_set is not None: if trans_set in ['train', 'both']: X_train = transform(X_train, trans_type) X_train = data_utils.rescale(X_train, range=(0., 1.)) X_train = data_utils.set_channels_first(X_train) if trans_set in ['test', 'both']: X_test = transform(X_test, trans_type) X_test = data_utils.rescale(X_test, range=(0., 1.)) X_test = data_utils.set_channels_first(X_test) """ summarize data set """ print('Dataset({}) Summary:'.format(dataset.upper())) print('Train set: {}, {}'.format(X_train.shape, Y_train.shape)) print('Test set: {}, {}'.format(X_test.shape, Y_test.shape)) return (X_train, Y_train), (X_test, Y_test)
def get_transformation_loaders(dataset, batch_size, transformation_configs=None, **kwargs): train_aug, test_aug = get_augmentation(dataset) split = kwargs.get('split', 0.15) split_idx = kwargs.get('split_idx', 0) target_lb = kwargs.get('targert_lb', -1) aug = kwargs.get('aug', 'default') cutout = kwargs.get('cutout', 0) print(f'[DATA][TRANSFORM_LOADER][dataset]: {dataset}') print(f'[DATA][TRANSFORM_LOADER][split: {split}') print(f'[DATA][TRANSFORM_LOADER][split_idx]: {split_idx}') print(f'[DATA][TRANSFORM_LOADER][train_aug]: {train_aug}') print(f'[DATA][TRANSFORM_LOADER][test_aug]: {test_aug}') print(f'[DATA][TRANSFORM_LOADER][aug]: {aug}') # load raw images (x_train, y_train), (x_test, y_test) = load_data(dataset=dataset) y_train = np.asarray([np.argmax(y) for y in y_train]) y_test = np.asarray([np.argmax(y) for y in y_test]) if isinstance(aug, list): logger.debug(f'Processing data with custom augmentation [{aug}].') print(f'Processing data with custom augmentation [{aug}].') train_aug.transforms.insert(0, Augmentation(C.get()['aug'])) else: logger.debug(f'Processing data with pre-defined augmentation [{aug}].') print(f'Processing data with pre-defined augmentation [{aug}].') if aug == 'fa_reduced_cifar10': train_aug.transforms.insert(0, Augmentation(fa_reduced_cifar10())) elif aug == 'arsaug': train_aug.transforms.insert(0, Augmentation(arsaug_policy())) elif aug == 'autoaug_cifar10': train_aug.transforms.insert(0, Augmentation(autoaug_paper_cifar10())) elif aug == 'autoaug_extend': train_aug.transforms.insert(0, Augmentation(autoaug_policy())) elif aug in ['default', 'inception', 'inception320']: pass else: raise ValueError(f'Augmentation [{aug}] is not supported.') if cutout > 0: train_aug.transforms.append(CutoutDefault(cutout)) # apply transformations if transformation_configs is not None: from models.bart.preprocess import process_batch processed_x_train = process_batch(data=x_train, transformation_configs=transformation_configs, channel_last=True) processed_x_test = process_batch(data=x_test, transformation_configs=transformation_configs, channel_last=True) else: processed_x_train = x_train processed_x_test = x_test processed_x_train = data_utils.set_channels_first(processed_x_train) processed_x_test = data_utils.set_channels_first(processed_x_test) if dataset in DATA.get_supported_datasets(): trainset = MyDataset(processed_x_train, y_train, aug=train_aug) testset = MyDataset(processed_x_test, y_test, aug=test_aug) else: raise ValueError(f'Dataset [{dataset}] is not supported yet.') train_sampler = None if split > 0.0: sss = StratifiedShuffleSplit(n_splits=5, test_size=split, random_state=0) sss = sss.split(list(range(len(trainset))), trainset.targets) train_idx = None valid_idx = None for _ in range(split_idx + 1): train_idx, valid_idx = next(sss) if target_lb >= 0: train_idx = [i for i in train_idx if trainset.targets[i] == target_lb] valid_idx = [i for i in valid_idx if trainset.targets[i] == target_lb] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetSampler(valid_idx) else: valid_sampler = SubsetSampler([]) trainloader = torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=True if train_sampler is None else False, num_workers=32, pin_memory=torch.cuda.is_available(), sampler=train_sampler, drop_last=True ) validloader = torch.utils.data.DataLoader( trainset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=torch.cuda.is_available(), sampler=valid_sampler, drop_last=False ) testloader = torch.utils.data.DataLoader( testset, batch_size=batch_size, shuffle=False, num_workers=32, pin_memory=torch.cuda.is_available(), drop_last=False ) return train_sampler, trainloader, validloader, testloader