Example #1
0
def get_aeloaders(dataset,
                  batch,
                  dataroot,
                  ae_file,
                  trans_type=TRANSFORMATION.clean):
    train_sampler, trainloader, validloader, _ = get_dataloaders(
        dataset, batch, dataroot, trans_type)
    _, test_aug = get_augmentation(dataset)
    _, (_, y_test) = load_data(dataset)

    x_ae = load_model(ae_file)
    x_ae = transform(x_ae, trans_type)
    x_ae = data_utils.rescale(x_ae)

    x_ae = data_utils.set_channels_first(x_ae)

    testset = MyDataset(x_ae, y_test, aug=test_aug)
    testloader = torch.utils.data.DataLoader(
        testset,
        batch_size=batch,
        shuffle=False,
        num_workers=32,
        pin_memory=torch.cuda.is_available(),
        drop_last=False)

    return train_sampler, trainloader, validloader, testloader
Example #2
0
def prediction(data, models, nClasses, transformationList, batch_size=32, channel_last=True):
    '''
        input:
            data: nSamples X <Sample Dimension>
            models: a list of classification models
        output:
            prediction matrix M - nWeakModels X nSamples X nClasses.
    '''
    nSamples, nWeakModels = data.shape[0], len(models)
    rawPred = np.zeros((nWeakModels, nSamples, nClasses))

    transTCs = []
    predTCs = []
    data = np.float32(data)
    for mIdx in range(nWeakModels):
        startTime = time.time()
        transformationType = transformationList[mIdx]
        testData = transform(data, transformationType)
        transTCs.append(time.time()-startTime)

        if not channel_last:
            # input shape of cnn model is <n_samples, n_channels, rows, cols>
            testData = data_utils.set_channels_first(testData)
        startTime = time.time()
        rawPred[mIdx] = models[mIdx].predict(testData, batch_size=batch_size)
        predTCs.append(time.time() - startTime)

    return rawPred, transTCs, predTCs
Example #3
0
def load_data(dataset, trans_type=TRANSFORMATION.clean, trans_set='both'):
    assert dataset in DATA.get_supported_datasets()
    assert trans_set is None or trans_set in ['none', 'train', 'test', 'both']

    X_train = None
    Y_train = None
    X_test = None
    Y_test = None
    img_rows = 0
    img_cols = 0
    nb_channels = 0
    nb_classes = 0

    if DATA.mnist == dataset:
        """
        Dataset of 60,000 28x28 grayscale images of the 10 digits,
        along with a test set of 10,000 images.
        """
        (X_train, Y_train), (X_test, Y_test) = mnist.load_data()

        nb_examples, img_rows, img_cols = X_test.shape
        nb_channels = 1
        nb_classes = 10
    elif DATA.fation_mnist == dataset:
        """
        Dataset of 60,000 28x28 grayscale images of 10 fashion categories,
        along with a test set of 10,000 images. The class labels are:
        Label   Description
        0       T-shirt/top
        1       Trouser
        2       Pullover
        3       Dress
        4       Coat
        5       Sandal
        6       Shirt
        7       Sneaker
        8       Bag
        9       Ankle boot
        """
        (X_train, Y_train), (X_test, Y_test) = fashion_mnist.load_data()

        nb_examples, img_rows, img_cols = X_test.shape
        nb_channels = 1
        nb_classes = 10
    elif DATA.cifar_10 == dataset:
        """
        Dataset of 50,000 32x32 color training images, labeled over 10 categories, and 10,000 test images.
        """
        (X_train, Y_train), (X_test, Y_test) = cifar10.load_data()

        nb_examples, img_rows, img_cols, nb_channels = X_test.shape
        nb_classes = 10
    elif DATA.cifar_100 == dataset:
        (X_train, Y_train), (X_test,
                             Y_test) = cifar100.load_data(label_mode='fine')
        nb_examples, img_rows, img_cols, nb_channels = X_test.shape
        nb_classes = 100

    X_train = X_train.reshape(-1, img_rows, img_cols, nb_channels)
    X_test = X_test.reshape(-1, img_rows, img_cols, nb_channels)
    """
    cast pixels to floats, normalize to [0, 1] range
    """
    X_train = X_train.astype(np.float32)
    X_test = X_test.astype(np.float32)
    X_train = data_utils.rescale(X_train, range=(0., 1.))
    X_test = data_utils.rescale(X_test, range=(0., 1.))
    """
    one-hot-encode the labels
    """
    Y_train = keras.utils.to_categorical(Y_train, nb_classes)
    Y_test = keras.utils.to_categorical(Y_test, nb_classes)
    """
    transform images
    """
    if trans_set is not None:
        if trans_set in ['train', 'both']:
            X_train = transform(X_train, trans_type)
            X_train = data_utils.rescale(X_train, range=(0., 1.))
            X_train = data_utils.set_channels_first(X_train)

        if trans_set in ['test', 'both']:
            X_test = transform(X_test, trans_type)
            X_test = data_utils.rescale(X_test, range=(0., 1.))
            X_test = data_utils.set_channels_first(X_test)
    """
    summarize data set
    """
    print('Dataset({}) Summary:'.format(dataset.upper()))
    print('Train set: {}, {}'.format(X_train.shape, Y_train.shape))
    print('Test set: {}, {}'.format(X_test.shape, Y_test.shape))
    return (X_train, Y_train), (X_test, Y_test)
Example #4
0
def get_transformation_loaders(dataset, batch_size, transformation_configs=None, **kwargs):
    train_aug, test_aug = get_augmentation(dataset)

    split = kwargs.get('split', 0.15)
    split_idx = kwargs.get('split_idx', 0)
    target_lb = kwargs.get('targert_lb', -1)
    aug = kwargs.get('aug', 'default')
    cutout = kwargs.get('cutout', 0)

    print(f'[DATA][TRANSFORM_LOADER][dataset]: {dataset}')
    print(f'[DATA][TRANSFORM_LOADER][split: {split}')
    print(f'[DATA][TRANSFORM_LOADER][split_idx]: {split_idx}')
    print(f'[DATA][TRANSFORM_LOADER][train_aug]: {train_aug}')
    print(f'[DATA][TRANSFORM_LOADER][test_aug]: {test_aug}')
    print(f'[DATA][TRANSFORM_LOADER][aug]: {aug}')

    # load raw images
    (x_train, y_train), (x_test, y_test) = load_data(dataset=dataset)
    y_train = np.asarray([np.argmax(y) for y in y_train])
    y_test = np.asarray([np.argmax(y) for y in y_test])

    if isinstance(aug, list):
        logger.debug(f'Processing data with custom augmentation [{aug}].')
        print(f'Processing data with custom augmentation [{aug}].')
        train_aug.transforms.insert(0, Augmentation(C.get()['aug']))
    else:
        logger.debug(f'Processing data with pre-defined augmentation [{aug}].')
        print(f'Processing data with pre-defined augmentation [{aug}].')
        if aug == 'fa_reduced_cifar10':
            train_aug.transforms.insert(0, Augmentation(fa_reduced_cifar10()))
        elif aug == 'arsaug':
            train_aug.transforms.insert(0, Augmentation(arsaug_policy()))
        elif aug == 'autoaug_cifar10':
            train_aug.transforms.insert(0, Augmentation(autoaug_paper_cifar10()))
        elif aug == 'autoaug_extend':
            train_aug.transforms.insert(0, Augmentation(autoaug_policy()))
        elif aug in ['default', 'inception', 'inception320']:
            pass
        else:
            raise ValueError(f'Augmentation [{aug}] is not supported.')

    if cutout > 0:
        train_aug.transforms.append(CutoutDefault(cutout))

    # apply transformations
    if transformation_configs is not None:
        from models.bart.preprocess import process_batch
        processed_x_train = process_batch(data=x_train, transformation_configs=transformation_configs, channel_last=True)
        processed_x_test = process_batch(data=x_test, transformation_configs=transformation_configs, channel_last=True)
    else:
        processed_x_train = x_train
        processed_x_test = x_test

    processed_x_train = data_utils.set_channels_first(processed_x_train)
    processed_x_test = data_utils.set_channels_first(processed_x_test)

    if dataset in DATA.get_supported_datasets():
        trainset = MyDataset(processed_x_train, y_train, aug=train_aug)
        testset = MyDataset(processed_x_test, y_test, aug=test_aug)
    else:
        raise ValueError(f'Dataset [{dataset}] is not supported yet.')

    train_sampler = None
    if split > 0.0:
        sss = StratifiedShuffleSplit(n_splits=5, test_size=split, random_state=0)
        sss = sss.split(list(range(len(trainset))), trainset.targets)

        train_idx = None
        valid_idx = None
        for _ in range(split_idx + 1):
            train_idx, valid_idx = next(sss)

        if target_lb >= 0:
            train_idx = [i for i in train_idx if trainset.targets[i] == target_lb]
            valid_idx = [i for i in valid_idx if trainset.targets[i] == target_lb]

        train_sampler = SubsetRandomSampler(train_idx)
        valid_sampler = SubsetSampler(valid_idx)
   
    else:
        valid_sampler = SubsetSampler([])

    trainloader = torch.utils.data.DataLoader(
        trainset, batch_size=batch_size, shuffle=True if train_sampler is None else False, num_workers=32,
        pin_memory=torch.cuda.is_available(), sampler=train_sampler, drop_last=True
    )
    validloader = torch.utils.data.DataLoader(
        trainset, batch_size=batch_size, shuffle=False, num_workers=16, pin_memory=torch.cuda.is_available(),
        sampler=valid_sampler, drop_last=False
    )
    testloader = torch.utils.data.DataLoader(
        testset, batch_size=batch_size, shuffle=False, num_workers=32, pin_memory=torch.cuda.is_available(),
        drop_last=False
    )

    return train_sampler, trainloader, validloader, testloader