コード例 #1
0
def get_single_task(dataroot, task):
    tf = transforms.ToTensor()

    if task.startswith('EMNIST'):
        split = task.split('/', maxsplit=2)[1]
        dataroot = join(dataroot, 'emnist')
        tf_target = (lambda x: x - 1) if split == 'letters' else None
        output_size = 26 if split == 'letters' else 10
        trainset = EMNIST(dataroot,
                          split=split,
                          train=True,
                          transform=tf,
                          target_transform=tf_target)
        trainset = stratified_subset(trainset, trainset.targets.tolist(), 500)
        testset = EMNIST(dataroot,
                         split=split,
                         train=False,
                         transform=tf,
                         target_transform=tf_target)
    elif task == 'KMNIST':
        dataroot = join(dataroot, 'kmnist')
        output_size = 10
        trainset = KMNIST(dataroot, train=True, transform=tf)
        trainset = stratified_subset(trainset, trainset.targets.tolist(), 500)
        testset = KMNIST(dataroot, train=False, transform=tf)
    else:
        raise ValueError(task)

    return trainset, testset, output_size
コード例 #2
0
def load_kmnist_data():
    transforms = Compose([
        ConvertImageDtype(float32),
        Normalize(zeros(28, 28) + 0.5,
                  zeros(28, 28) + 0.5),
    ])
    train_set = KMNIST(root="./data/",
                       download=True,
                       train=True,
                       transform=transforms)
    test_set = KMNIST(root="./data/",
                      train=False,
                      download=True,
                      transform=transforms)

    train_data = transforms(train_set.data)
    test_data = transforms(test_set.data)

    return (
        train_data.unsqueeze(1),
        test_data.unsqueeze(1),
        train_set.targets,
        test_set.targets,
        train_set.classes,
    )
コード例 #3
0
def kmnist(root):
    from torchvision.datasets import KMNIST
    transform = transforms.Compose([
        lambda x: x.convert("RGB"),
        transforms.Resize(224),
        transforms.ToTensor(),
    ])
    trainset = KMNIST(root, train=True, transform=transform, download=True)
    testset = KMNIST(root, train=False, transform=transform)
    return trainset, testset
コード例 #4
0
    def create(cls, args):
        trainset = KMNIST(args.dataset_root,
                          train=True,
                          transform=cls.get_transforms(args, True),
                          download=True)
        testset = KMNIST(args.dataset_root,
                         train=False,
                         transform=cls.get_transforms(args, False),
                         download=True)

        train, valid = _split(args, trainset)

        return train, valid, testset
コード例 #5
0
def get_loaders_kmnist(train_translation_rotation_list,
                       test_translation_rotation_list, batch_size):
    """Load kmnist dataset. The data is divided by 255 and subracted by mean and divided by standard deviation.
    """
    train_loaders_desc = []
    test_loaders_desc = []
    for (translation, rotation) in train_translation_rotation_list:
        train_desc = 'train_' + str(translation[0]) + '_' + str(
            translation[1]) + '_' + str(
                rotation) + '_' + 'KMNIST'  #for identifying in logs
        train_transform = transforms.Compose([
            #transforms.Resize(31),
            transforms.RandomAffine(rotation, translation, (0.9, 1.1)),
            transforms.ToTensor(),
            transforms.Normalize((0.5, ), (0.5, )),
        ])
        training_set = KMNIST(DATAPATH,
                              train=True,
                              download=True,
                              transform=train_transform)
        training_data_loader = DataLoader(training_set,
                                          batch_size=batch_size,
                                          shuffle=True)
        train_loaders_desc.append((training_data_loader, train_desc))
    for (translation, rotation) in test_translation_rotation_list:
        test_desc = 'test_' + str(translation[0]) + '_' + str(
            translation[1]) + '_' + str(
                rotation) + '_' + 'KMNIST'  #for identifying in logs
        test_transform = transforms.Compose([
            #transforms.Resize(31),
            transforms.RandomAffine(rotation, translation),
            transforms.ToTensor(),
            transforms.Normalize((0.5, ), (0.5, )),
        ])
        testing_set = KMNIST(DATAPATH,
                             train=False,
                             download=True,
                             transform=test_transform)
        testing_data_loader = DataLoader(testing_set,
                                         batch_size=batch_size,
                                         shuffle=True)
        test_loaders_desc.append((testing_data_loader, test_desc))
    return train_loaders_desc, test_loaders_desc
コード例 #6
0
def get_kmnist_loaders(data_dir, b_sz, shuffle=True):
    """Helper function that deserializes KMNIST data 
    and returns the relevant data loaders.

    params:
        data_dir:    string - root directory where the data will be saved
        b_sz:        integer - the batch size
        shuffle:     boolean - whether to shuffle the training set or not
    """
    train_loader = DataLoader(KMNIST(data_dir,
                                     transform=ToTensor(),
                                     download=True),
                              shuffle=shuffle,
                              batch_size=b_sz)
    test_loader = DataLoader(KMNIST(data_dir,
                                    train=False,
                                    transform=ToTensor(),
                                    download=True),
                             shuffle=False,
                             batch_size=b_sz)

    return train_loader, test_loader
コード例 #7
0
ファイル: datasets.py プロジェクト: sinnr1992/curvature
def kmnist(root: str,
           batch_size: int = 32,
           workers: int = 6,
           splits: Union[str, Tuple[str]] = ('train', 'val')) -> LoaderTypes:
    """Wrapper for loading the `KMNIST` dataset.

    Args:
        root: The root directory where the dataset is stored. Usually ~/.torch/datasets.
        batch_size: The batch size.
        workers: The number of CPUs to use for when loading the data from disk.
        splits: Which splits of the data to return. Possible values are `train`, `val` and `test`.

    Returns:
        A list data loaders of the chosen splits.
    """
    loader_list = list()
    if 'train' in splits or 'val' in splits:
        train_val_set = KMNIST(root, train=True, download=True, transform=ToTensor())

        val_set, train_set = torch.utils.data.random_split(train_val_set, [10000, len(train_val_set) - 10000])
        if 'train' in splits:
            train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=workers,
                                      pin_memory=True)
            loader_list.append(train_loader)
        if 'val' in splits:
            val_set = Memory(val_set, img_size=28, channels=1)
            for _ in val_set:
                pass
            val_set.set_use_cache(True)
            val_set.pin_memory()
            loader_list.append(val_set)
    if 'test' in splits:
        test_set = KMNIST(root, train=False, download=True, transform=ToTensor())
        test_set = Memory(test_set, img_size=28, channels=1)
        for _ in test_set:
            pass
        test_set.set_use_cache(True)
        test_set.pin_memory()
        loader_list.append(test_set)

    if len(loader_list) == 1:
        return loader_list[0]
    return loader_list
コード例 #8
0
 def _get_mnist(self, train, transform=None, download=False):
     return KMNIST(self.data_dir,
                   train=train,
                   transform=transform,
                   download=download)
train[0]

image,label = train[1]

image

from PIL import Image
new_iamge = image.resize((200,200), Image.ANTIALIAS)
new_iamge

label



from torchvision.datasets import KMNIST
train = KMNIST('./kmnist_folder', train=True, download=True)

train

train[0]

image,label = train[0]

image

from PIL import Image
new_iamge = image.resize((200,200), Image.ANTIALIAS)
new_iamge

import torchvision.datasets as dsets
import torchvision.transforms as transforms
コード例 #10
0
from torchvision.datasets import KMNIST
from torch.utils.data import TensorDataset
from data.data_helpers import split_dataset, stratified_split_dataset
import properties as prop
import pwd, os
from data.data_helpers import split_dataset, concat_datasets

DATA_PATH = pwd.getpwuid(os.getuid()).pw_dir + '/time_series_data/kMNIST'


def transform_data(data):
    data = data.unsqueeze(1).float().div(255)
    return data


train_dataset = KMNIST(DATA_PATH, train=True, download=True)
trainX, trainy = transform_data(train_dataset.data), train_dataset.targets

#validation_dataset, train_dataset = split_dataset(TensorDataset(trainX, trainy), prop.VAL_SIZE)
#trainX, trainy = train_dataset.tensors[0], train_dataset.tensors[1]

train_dataset = TensorDataset(trainX, trainy)

# train_size = 2000
# train_dataset, _ = split_dataset(train_dataset, train_size)

################ test dataset ################################
test_dataset = KMNIST(DATA_PATH, train=False, download=True)
testX, testy = transform_data(test_dataset.data), test_dataset.targets

test_dataset = TensorDataset(testX, testy)
コード例 #11
0
def select_dataset(dataset_name, input_dim=2, n_samples=10000):
    """
    :params n_samples: number of points returned. If 0, all datapoints will be returned. For artificial data, it will throw an error.
    """
    if dataset_name == 'fmnist':
        f_mnist = FashionMNIST(root="./datasets", download=True)
        data = f_mnist.data.numpy()
        vec_data = np.reshape(data, (data.shape[0], -1))
        vec_data = np.float32(vec_data)
        labels = f_mnist.targets.numpy()
    elif dataset_name == 'emnist':
        f_mnist = EMNIST(root="./datasets", download=True, split='byclass')
        data = f_mnist.data.numpy()
        vec_data = np.reshape(data, (data.shape[0], -1))
        vec_data = np.float32(vec_data)
        labels = f_mnist.targets.numpy()
    elif dataset_name == 'kmnist':
        f_mnist = KMNIST(root="./datasets", download=True)
        data = f_mnist.data.numpy()
        vec_data = np.reshape(data, (data.shape[0], -1))
        vec_data = np.float32(vec_data)
        labels = f_mnist.targets.numpy()
    elif dataset_name == 'usps':
        f_mnist = USPS(root="./datasets", download=True)
        data = f_mnist.data
        vec_data = np.reshape(data, (data.shape[0], -1))
        vec_data = np.float32(vec_data)
        labels = np.float32(f_mnist.targets)
    elif dataset_name == 'news':
        newsgroups_train = fetch_20newsgroups(data_home='./datasets', subset='train',
                                              remove=('headers', 'footers', 'quotes'))
        vectorizer = TfidfVectorizer()
        vec_data = vectorizer.fit_transform(newsgroups_train.data).toarray()
        vec_data = np.float32(vec_data)
        labels = newsgroups_train.target
        labels = np.float32(labels)
    elif dataset_name == 'cover_type':
        file_name = file_path + "/datasets/covtype.data"
        train_data = np.array(pd.read_csv(file_name, sep=','))
        vec_data = np.float32(train_data[:, :-1])
        labels = np.float32(train_data[:, -1])
    elif dataset_name == 'char':
        digits = datasets.load_digits()
        n_samples = len(digits.images)
        data = digits.images.reshape((n_samples, -1))
        vec_data = np.float32(data)
        labels = digits.target

    elif dataset_name == 'charx':
        file_name = file_path + "/datasets/char_x.npy"
        data = np.load(file_name, allow_pickle=True)
        vec_data, labels = data[0], data[1]

    elif dataset_name == 'kdd_cup':
        cover_train = fetch_kddcup99(data_home='./datasets', download_if_missing=True)
        vec_data = cover_train.data
        string_labels = cover_train.target
        vec_data, labels = feature_tranformers.vectorizer_kdd(data=vec_data, labels=string_labels)
    elif dataset_name == 'aggregation':
        file_name = file_path + "/2d_data/Aggregation.csv"
        a = np.array(pd.read_csv(file_name, sep=';'))
        vec_data = a[:, 0:2]
        labels = a[:, 2]
    elif dataset_name == 'compound':
        file_name = file_path + "/2d_data/Compound.txt"
        a = np.array(pd.read_csv(file_name, sep='\t'))
        vec_data = a[:, 0:2]
        labels = a[:, 2]
    elif dataset_name == 'd31':
        file_name = file_path + "/2d_data/D31.txt"
        a = np.array(pd.read_csv(file_name, sep='\t'))
        vec_data = a[:, 0:2]
        labels = a[:, 2]
    elif dataset_name == 'flame':
        file_name = file_path + "/2d_data/flame.txt"
        a = np.array(pd.read_csv(file_name, sep='\t'))
        vec_data = a[:, 0:2]
        labels = a[:, 2]
    elif dataset_name == 'path_based':
        file_name = file_path + "/2d_data/pathbased.txt"
        a = np.array(pd.read_csv(file_name, sep='\t'))
        vec_data = a[:, 0:2]
        labels = a[:, 2]
    elif dataset_name == 'r15':
        file_name = file_path + "/2d_data/R15.txt"
        a = np.array(pd.read_csv(file_name, sep='\t'))
        vec_data = a[:, 0:2]
        labels = a[:, 2]
    elif dataset_name == 'spiral':
        file_name = file_path + "/2d_data/spiral.txt"
        a = np.array(pd.read_csv(file_name, sep='\t'))
        vec_data = a[:, 0:2]
        labels = a[:, 2]
    elif dataset_name == 'birch1':
        file_name = file_path + "/2d_data/birch1.txt"
        a = np.array(pd.read_csv(file_name, delimiter=r"\s+"))
        vec_data = a[:, :]
        labels = np.ones((vec_data.shape[0]))
    elif dataset_name == 'birch2':
        file_name = file_path + "/2d_data/birch2.txt"
        a = np.array(pd.read_csv(file_name, delimiter=r"\s+"))
        vec_data = a[:, :]
        labels = np.ones((vec_data.shape[0]))
    elif dataset_name == 'birch3':
        file_name = file_path + "/2d_data/birch3.txt"
        a = np.array(pd.read_csv(file_name, delimiter=r"\s+"))
        vec_data = a[:, :]
        labels = np.ones((vec_data.shape[0]))
    elif dataset_name == 'worms':
        file_name = file_path + "/2d_data/worms/worms_2d.txt"
        a = np.array(pd.read_csv(file_name, sep=' '))
        vec_data = a[:, :]
        labels = np.ones((vec_data.shape[0]))
    elif dataset_name == 't48k':
        file_name = file_path + "/2d_data/t4.8k.txt"
        a = np.array(pd.read_csv(file_name, sep=' '))
        vec_data = a[1:, :]
        labels = np.ones((vec_data.shape[0]))
    elif dataset_name == 'moons':
        data, labels = make_moons(n_samples=5000)
        vec_data = np.float32(data)
        labels = np.float32(labels)
    elif dataset_name == 'circles':
        data, labels = make_circles(n_samples=5000)
        vec_data = np.float32(data)
        labels = np.float32(labels)
    elif dataset_name == 'blobs':
        data, labels = make_blobs(n_samples=n_samples, centers=3)
        vec_data = np.float32(data)
        labels = np.float32(labels)
    elif dataset_name == 'gmm':
        mean_1 = np.zeros(input_dim)
        mean_2 = 100 * np.ones(input_dim)
        cov = np.eye(input_dim)
        data_1 = np.random.multivariate_normal(mean_1, cov, int(n_samples / 2))
        labels_1 = np.ones(int(n_samples / 2))
        labels_2 = 2 * np.ones(int(n_samples / 2))
        data_2 = np.random.multivariate_normal(mean_2, cov, int(n_samples / 2))
        vec_data = np.concatenate([data_1, data_2], axis=0)
        labels = np.concatenate([labels_1, labels_2], axis=0)
    elif dataset_name == 'uniform':
        vec_data = np.random.uniform(0, 1, size=(n_samples, input_dim)) * 10
        labels = np.ones(n_samples)
    elif dataset_name == 'mnist_pc':
        d_mnist = MNIST(root="./datasets", download=True)
        mnist = d_mnist.data.numpy()
        data = np.float32(np.reshape(mnist, (mnist.shape[0], -1)))
        pca_data = PCA(n_components=input_dim).fit_transform(data)
        n_indices = np.random.randint(pca_data.shape[0], size=n_samples)
        vec_data = pca_data[n_indices]
        labels = d_mnist.targets.numpy()[n_indices]
    elif dataset_name == 'usps_pc':
        d_mnist = USPS(root="./datasets", download=True)
        mnist = d_mnist.data
        data = np.float32(np.reshape(mnist, (mnist.shape[0], -1)))
        pca_data = PCA(n_components=input_dim).fit_transform(data)
        n_indices = np.random.randint(pca_data.shape[0], size=n_samples)
        vec_data = pca_data[n_indices]
        labels = np.float32(d_mnist.targets)
    elif dataset_name == 'char_pc':
        digits = datasets.load_digits()
        n_samples = len(digits.images)
        data = digits.images.reshape((n_samples, -1))
        data = np.float32(data)
        targets = digits.target
        pca_data = PCA(n_components=input_dim).fit_transform(data)
        n_indices = np.random.randint(pca_data.shape[0], size=n_samples)
        vec_data = pca_data[n_indices]
        labels = targets
    else:
        d_mnist = MNIST(root="./datasets", download=True)
        data = d_mnist.data.numpy()
        vec_data = np.reshape(data, (data.shape[0], -1))
        vec_data = np.float32(vec_data)
        labels = d_mnist.targets.numpy()

    if 0 < n_samples < vec_data.shape[0]:
        rand_indices = np.random.choice(vec_data.shape[0], size=(n_samples,), replace=False)
        return vec_data[rand_indices], labels[rand_indices]
    else:
        return vec_data, labels
コード例 #12
0
    ]),
)
fmnist_test_dataset = FashionMNIST(
    DATA_DIR,
    train=False,
    transform=transforms.Compose([
        transforms.Pad(2),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, )),
    ]),
)
kmnist_train_dataset = KMNIST(
    DATA_DIR,
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.Pad(2),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, )),
    ]),
)
kmnist_test_dataset = KMNIST(
    DATA_DIR,
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.Pad(2),
        transforms.ToTensor(),
        transforms.Normalize((0.5, ), (0.5, )),
    ]),
)
コード例 #13
0
# 学習用モデルのインスタンスを生成します
model = MLP()

# -----------------------------------------------------------------------------
# 学習データの準備をします
#
print('---------- 学習のデータの準備 ----------')
data_folder = '~/data'
transform = transforms.Compose([
    # データの型をTensorに変換する
    transforms.ToTensor()
])

# 学習データ
train_data_with_labels = KMNIST(data_folder,
                                train=True,
                                download=True,
                                transform=transform)

train_data_loader = DataLoader(train_data_with_labels,
                               batch_size=BATCH_SIZE,
                               shuffle=True)

# 検証データ
test_data_with_labels = KMNIST(data_folder,
                               train=False,
                               download=True,
                               transform=transform)
test_data_loader = DataLoader(test_data_with_labels,
                              batch_size=BATCH_SIZE,
                              shuffle=True)