Example #1
0
def mnist_fashion(root):
    """A dataset of Zalando's article images consisting of fashion products.
    
    Fashion mnist datasets is a drop-in replacement of the original MNIST dataset
    from https://github.com/zalandoresearch/fashion-mnist.
    Each sample is an gray image (in 3D NDArray) with shape (28, 28, 1).
    
    Attention: if exist dirs `root/mnist_fashion`, api will delete it and create it.
    Data storage directory:
    root = `/user/.../mydata`
    mnist_fashion data: 
    `root/mnist_fashion/train/0/xx.png`
    `root/mnist_fashion/train/2/xx.png`
    `root/mnist_fashion/train/6/xx.png`
    `root/mnist_fashion/test/0/xx.png`
    `root/mnist_fashion/test/2/xx.png`
    `root/mnist_fashion/test/6/xx.png`
    Args:
        root: str, Store the absolute path of the data directory.
              example:if you want data path is `/user/.../mydata/mnist_fashion`,
              root should be `/user/.../mydata`.
    Returns:
        Store the absolute path of the data directory, is `root/mnist_fashion`.
    """
    start = time.time()
    task_path = assert_dirs(root, 'mnist_fashion')
    url_list = ['http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz',
                'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz',
                'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz',
                'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz']
    for url in url_list:
        rq.files(url, gfile.path_join(task_path, url.split('/')[-1]))
    with gzip.open(gfile.path_join(task_path, 'train-labels-idx1-ubyte.gz'), 'rb') as lbpath:
        train_label = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(gfile.path_join(task_path, 'train-images-idx3-ubyte.gz'), 'rb') as imgpath:
        train = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(train_label), 28, 28)

    with gzip.open(gfile.path_join(task_path, 't10k-labels-idx1-ubyte.gz'), 'rb') as lbpath:
        test_label = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(gfile.path_join(task_path, 't10k-images-idx3-ubyte.gz'), 'rb') as imgpath:
        test = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(test_label), 28, 28)
    
    for i in set(train_label):
        gfile.makedirs(gfile.path_join(task_path, 'train', str(i)))
    for i in set(test_label):
        gfile.makedirs(gfile.path_join(task_path, 'test', str(i)))
    for idx in range(train.shape[0]):
        save_image(gfile.path_join(task_path, 'train', str(train_label[idx]), str(idx)+'.png'), 
                   array_to_image(train[idx].reshape(28, 28, 1)))
    for idx in range(test.shape[0]):
        save_image(gfile.path_join(task_path, 'test', str(test_label[idx]), str(idx)+'.png'), 
                   array_to_image(test[idx].reshape(28, 28, 1)))
    for url in url_list:
        gfile.remove(gfile.path_join(task_path, url.split('/')[-1]))
    print('mnist_fashion dataset download completed, run time %d min %.2f sec' %divmod((time.time()-start), 60))
    return task_path
Example #2
0
def cifar100(root, fine_label=True):
    """CIFAR100 image classification dataset from https://www.cs.toronto.edu/~kriz/cifar.html
    
    Each sample is an image (in 3D NDArray) with shape (32, 32, 3).
    
    Attention: if exist dirs `root/cifar100`, api will delete it and create it.
    Data storage directory:
    root = `/user/.../mydata`
    cifar100 data: 
    `root/cifar100/train/0/xx.png`
    `root/cifar100/train/2/xx.png`
    `root/cifar100/train/6/xx.png`
    `root/cifar100/test/0/xx.png`
    `root/cifar100/test/2/xx.png`
    `root/cifar100/test/6/xx.png`
    Args:
        root: str, Store the absolute path of the data directory.
              example:if you want data path is `/user/.../mydata/cifar100`,
              root should be `/user/.../mydata`.
        fine_label: bool, default False.
                    Whether to load the fine-grained (100 classes) or 
                    coarse-grained (20 super-classes) labels.
    Returns:
        Store the absolute path of the data directory, is `root/cifar100`.
    """
    start = time.time()
    task_path = assert_dirs(root, 'cifar100')
    url = 'https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/cifar100/cifar-100-binary.tar.gz'
    rq.files(url, gfile.path_join(task_path, url.split('/')[-1]))
    with tarfile.open(gfile.path_join(task_path, url.split('/')[-1])) as t:
        t.extractall(task_path)
    noise_flie = gfile.listdir(task_path)
    with open(gfile.path_join(task_path, 'train.bin'), 'rb') as fin:
        data = np.frombuffer(fin.read(), dtype=np.uint8).reshape(-1, 3072 + 2)
        train = data[:, 2:].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
        train_label = data[:, 0 + fine_label].astype(np.int32)
    for i in set(train_label):
        gfile.makedirs(gfile.path_join(task_path, 'train', str(i)))
    for idx in range(train.shape[0]):
        save_image(
            gfile.path_join(task_path, 'train', str(train_label[idx]),
                            str(idx) + '.png'), array_to_image(train[idx]))
    with open(gfile.path_join(task_path, 'test.bin'), 'rb') as fin:
        data = np.frombuffer(fin.read(), dtype=np.uint8).reshape(-1, 3072 + 2)
        test = data[:, 2:].reshape(-1, 3, 32, 32).transpose(0, 2, 3, 1)
        test_label = data[:, 0 + fine_label].astype(np.int32)
    for i in set(test_label):
        gfile.makedirs(gfile.path_join(task_path, 'test', str(i)))
    for idx in range(test.shape[0]):
        save_image(
            gfile.path_join(task_path, 'test', str(test_label[idx]),
                            str(idx) + '.png'), array_to_image(test[idx]))
    for file in noise_flie:
        gfile.remove(gfile.path_join(task_path, file))
    print('cifar100 dataset download completed, run time %d min %.2f sec' %
          divmod((time.time() - start), 60))
    return task_path
Example #3
0
def mnist(root):
    """MNIST handwritten digits dataset from http://yann.lecun.com/exdb/mnist
    
    Each sample is an gray image (in 3D NDArray) with shape (28, 28, 1).
    
    Attention: if exist dirs `root/mnist`, api will delete it and create it.
    Data storage directory:
    root = `/user/.../mydata`
    mnist data: 
    `root/mnist/train/0/xx.png`
    `root/mnist/train/2/xx.png`
    `root/mnist/train/6/xx.png`
    `root/mnist/test/0/xx.png`
    `root/mnist/test/2/xx.png`
    `root/mnist/test/6/xx.png`
    Args:
        root: str, Store the absolute path of the data directory.
              example:if you want data path is `/user/.../mydata/mnist`,
              root should be `/user/.../mydata`.
    Returns:
        Store the absolute path of the data directory, is `root/mnist`.
    """
    start = time.time()
    task_path = assert_dirs(root, 'mnist')
    url_list = ['https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/mnist/train-labels-idx1-ubyte.gz',
                'https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/mnist/train-images-idx3-ubyte.gz',
                'https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/mnist/t10k-labels-idx1-ubyte.gz',
                'https://apache-mxnet.s3-accelerate.dualstack.amazonaws.com/gluon/dataset/mnist/t10k-images-idx3-ubyte.gz']
    for url in url_list:
        rq.files(url, gfile.path_join(task_path, url.split('/')[-1]))
    with gzip.open(gfile.path_join(task_path, 'train-labels-idx1-ubyte.gz'), 'rb') as lbpath:
        train_label = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(gfile.path_join(task_path, 'train-images-idx3-ubyte.gz'), 'rb') as imgpath:
        train = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(train_label), 28, 28)

    with gzip.open(gfile.path_join(task_path, 't10k-labels-idx1-ubyte.gz'), 'rb') as lbpath:
        test_label = np.frombuffer(lbpath.read(), np.uint8, offset=8)

    with gzip.open(gfile.path_join(task_path, 't10k-images-idx3-ubyte.gz'), 'rb') as imgpath:
        test = np.frombuffer(imgpath.read(), np.uint8, offset=16).reshape(len(test_label), 28, 28)
    
    for i in set(train_label):
        gfile.makedirs(gfile.path_join(task_path, 'train', str(i)))
    for i in set(test_label):
        gfile.makedirs(gfile.path_join(task_path, 'test', str(i)))
    for idx in range(train.shape[0]):
        save_image(gfile.path_join(task_path, 'train', str(train_label[idx]), str(idx)+'.png'), 
                   array_to_image(train[idx].reshape(28, 28, 1)))
    for idx in range(test.shape[0]):
        save_image(gfile.path_join(task_path, 'test', str(test_label[idx]), str(idx)+'.png'), 
                   array_to_image(test[idx].reshape(28, 28, 1)))
    for url in url_list:
        gfile.remove(gfile.path_join(task_path, url.split('/')[-1]))
    print('mnist dataset download completed, run time %d min %.2f sec' %divmod((time.time()-start), 60))
    return task_path
Example #4
0
def mnist_kuzushiji10(root):
    """Kuzushiji-MNIST from https://github.com/rois-codh/kmnist.
    
    Kuzushiji-MNIST is a drop-in replacement for the
    MNIST dataset (28x28 grayscale, 70,000 images), 
    provided in the original MNIST format as well as a NumPy format.
    Since MNIST restricts us to 10 classes, we chose one character to
    represent each of the 10 rows of Hiragana when creating Kuzushiji-MNIST.
    
    Each sample is an gray image (in 3D NDArray) with shape (28, 28, 1).
    
    Attention: if exist dirs `root/mnist_kuzushiji10`, api will delete it and create it.
    Data storage directory:
    root = `/user/.../mydata`
    mnist_kuzushiji10 data: 
    `root/mnist_kuzushiji10/train/0/xx.png`
    `root/mnist_kuzushiji10/train/2/xx.png`
    `root/mnist_kuzushiji10/train/6/xx.png`
    `root/mnist_kuzushiji10/test/0/xx.png`
    `root/mnist_kuzushiji10/test/2/xx.png`
    `root/mnist_kuzushiji10/test/6/xx.png`
    Args:
        root: str, Store the absolute path of the data directory.
              example:if you want data path is `/user/.../mydata/mnist_kuzushiji10`,
              root should be `/user/.../mydata`.
    Returns:
        Store the absolute path of the data directory, is `root/mnist_kuzushiji10`.
    """
    start = time.time()
    task_path = assert_dirs(root, 'mnist_kuzushiji10')
    url_list = ['http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-imgs.npz',
                'http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-train-labels.npz',
                'http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-test-imgs.npz',
                'http://codh.rois.ac.jp/kmnist/dataset/kmnist/kmnist-test-labels.npz']
    for url in url_list:
        rq.files(url, gfile.path_join(task_path, url.split('/')[-1]))
    train = np.load(gfile.path_join(task_path, 'kmnist-train-imgs.npz'))['arr_0']
    train_label = np.load(gfile.path_join(task_path, 'kmnist-train-labels.npz'))['arr_0']
    test = np.load(gfile.path_join(task_path, 'kmnist-test-imgs.npz'))['arr_0']
    test_label = np.load(gfile.path_join(task_path, 'kmnist-test-labels.npz'))['arr_0']
    for i in set(train_label):
        gfile.makedirs(gfile.path_join(task_path, 'train', str(i)))
    for i in set(test_label):
        gfile.makedirs(gfile.path_join(task_path, 'test', str(i)))
    for idx in range(train.shape[0]):
        save_image(gfile.path_join(task_path, 'train', str(train_label[idx]), str(idx)+'.png'), 
                   array_to_image(train[idx].reshape(28, 28, 1)))
    for idx in range(test.shape[0]):
        save_image(gfile.path_join(task_path, 'test', str(test_label[idx]), str(idx)+'.png'), 
                   array_to_image(test[idx].reshape(28, 28, 1)))
    for url in url_list:
        gfile.remove(gfile.path_join(task_path, url.split('/')[-1]))
    print('mnist_kuzushiji10 dataset download completed, run time %d min %.2f sec' %divmod((time.time()-start), 60))
    return task_path
Example #5
0
def mnist_kuzushiji49(root):
    """Kuzushiji-49 from https://github.com/rois-codh/kmnist.
    
    Kuzushiji-49, as the name suggests, has 49 classes (28x28 grayscale, 270,912 images),
    is a much larger, but imbalanced dataset containing 48 Hiragana 
    characters and one Hiragana iteration mark.
    
    Each sample is an gray image (in 3D NDArray) with shape (28, 28, 1).
    
    Attention: if exist dirs `root/mnist_kuzushiji49`, api will delete it and create it.
    Data storage directory:
    root = `/user/.../mydata`
    mnist_kuzushiji49 data: 
    `root/mnist_kuzushiji49/train/0/xx.png`
    `root/mnist_kuzushiji49/train/2/xx.png`
    `root/mnist_kuzushiji49/train/6/xx.png`
    `root/mnist_kuzushiji49/test/0/xx.png`
    `root/mnist_kuzushiji49/test/2/xx.png`
    `root/mnist_kuzushiji49/test/6/xx.png`
    Args:
        root: str, Store the absolute path of the data directory.
              example:if you want data path is `/user/.../mydata/mnist_kuzushiji49`,
              root should be `/user/.../mydata`.
    Returns:
        Store the absolute path of the data directory, is `root/mnist_kuzushiji49`.
    """
    start = time.time()
    task_path = assert_dirs(root, 'mnist_kuzushiji49')
    url_list = ['http://codh.rois.ac.jp/kmnist/dataset/k49/k49-train-imgs.npz',
                'http://codh.rois.ac.jp/kmnist/dataset/k49/k49-train-labels.npz',
                'http://codh.rois.ac.jp/kmnist/dataset/k49/k49-test-imgs.npz',
                'http://codh.rois.ac.jp/kmnist/dataset/k49/k49-test-labels.npz']
    for url in url_list:
        rq.files(url, gfile.path_join(task_path, url.split('/')[-1]))
    train = np.load(gfile.path_join(task_path, 'k49-train-imgs.npz'))['arr_0']
    train_label = np.load(gfile.path_join(task_path, 'k49-train-labels.npz'))['arr_0']
    test = np.load(gfile.path_join(task_path, 'k49-test-imgs.npz'))['arr_0']
    test_label = np.load(gfile.path_join(task_path, 'k49-test-labels.npz'))['arr_0']
    for i in set(train_label):
        gfile.makedirs(gfile.path_join(task_path, 'train', str(i)))
    for i in set(test_label):
        gfile.makedirs(gfile.path_join(task_path, 'test', str(i)))
    for idx in range(train.shape[0]):
        save_image(gfile.path_join(task_path, 'train', str(train_label[idx]), str(idx)+'.png'), 
                   array_to_image(train[idx].reshape(28, 28, 1)))
    for idx in range(test.shape[0]):
        save_image(gfile.path_join(task_path, 'test', str(test_label[idx]), str(idx)+'.png'), 
                   array_to_image(test[idx].reshape(28, 28, 1)))
    for url in url_list:
        gfile.remove(gfile.path_join(task_path, url.split('/')[-1]))
    print('mnist_kuzushiji49 dataset download completed, run time %d min %.2f sec' %divmod((time.time()-start), 60))
    return task_path
Example #6
0
def mnist_kannada(root):
    """kannada-MNIST from https://github.com/vinayprabhu/Kannada_MNIST.
    
    The Kannada-MNIST dataset was created an a drop-in substitute for the standard MNIST dataset.
    
    Each sample is an gray image (in 3D NDArray) with shape (28, 28, 1).
    
    Attention: if exist dirs `root/mnist_kannada`, api will delete it and create it.
    Data storage directory:
    root = `/user/.../mydata`
    mnist_kannada data: 
    `root/mnist_kannada/train/0/xx.png`
    `root/mnist_kannada/train/2/xx.png`
    `root/mnist_kannada/train/6/xx.png`
    `root/mnist_kannada/test/0/xx.png`
    `root/mnist_kannada/test/2/xx.png`
    `root/mnist_kannada/test/6/xx.png`
    Args:
        root: str, Store the absolute path of the data directory.
              example:if you want data path is `/user/.../mydata/mnist_kannada`,
              root should be `/user/.../mydata`.
    Returns:
        Store the absolute path of the data directory, is `root/mnist_kannada`.
    """
    start = time.time()
    print('Downloading data from https://github.com/Hourout/datasets/releases/download/0.0.1/kannada_MNIST.zip')
    task_path = assert_dirs(root, 'mnist_kannada')
    zip_path = rq.files('https://github.com/Hourout/datasets/releases/download/0.0.1/kannada_MNIST.zip', task_path+'/kannada_MNIST.zip')
    unzip_path = un_zip(task_path+'/kannada_MNIST.zip')
    train = pd.read_csv(gfile.path_join(task_path, 'kannada_MNIST/kannada_MNIST_train.csv'), header=None, dtype='uint8')
    test = pd.read_csv(gfile.path_join(task_path, 'kannada_MNIST/kannada_MNIST_test.csv'), header=None, dtype='uint8')
    for i in set(train[0]):
        gfile.makedirs(gfile.path_join(task_path, 'train', str(i)))
        gfile.makedirs(gfile.path_join(task_path, 'test', str(i)))
    for i in range(len(train)):
        save_image(gfile.path_join(task_path, 'train', str(train.iat[i, 0]), str(i)+'.png'),
                       array_to_image(train.iloc[i, 1:].values.reshape(28, 28, 1)))
    for i in range(len(test)):
        save_image(gfile.path_join(task_path, 'test', str(test.iat[i, 0]), str(i)+'.png'),
                       array_to_image(test.iloc[i, 1:].values.reshape(28, 28, 1)))
    gfile.remove(zip_path)
    gfile.remove(unzip_path)
    print('mnist_kannada dataset download completed, run time %d min %.2f sec' %divmod((time.time()-start), 60))
    return task_path
Example #7
0
def mnist_tibetan(root):
    """Tibetan-MNIST from https://github.com/bat67/TibetanMNIST.
    
    Tibetan-MNIST is a drop-in replacement for the
    MNIST dataset (28x28 grayscale, 70,000 images), 
    provided in the original MNIST format as well as a NumPy format.
    Since MNIST restricts us to 10 classes, we chose one character to
    represent each of the 10 rows of Hiragana when creating Tibetan-MNIST.
    
    Each sample is an gray image (in 3D NDArray) with shape (28, 28, 1).
    
    Attention: if exist dirs `root/mnist_tibetan`, api will delete it and create it.
    Data storage directory:
    root = `/user/.../mydata`
    mnist_tibetan data: 
    `root/mnist_tibetan/train/0/xx.png`
    `root/mnist_tibetan/train/2/xx.png`
    `root/mnist_tibetan/train/6/xx.png`
    `root/mnist_tibetan/test/0/xx.png`
    `root/mnist_tibetan/test/2/xx.png`
    `root/mnist_tibetan/test/6/xx.png`
    Args:
        root: str, Store the absolute path of the data directory.
              example:if you want data path is `/user/.../mydata/mnist_tibetan`,
              root should be `/user/.../mydata`.
    Returns:
        Store the absolute path of the data directory, is `root/mnist_tibetan`.
    """
    start = time.time()
    print('Downloading data from https://github.com/Hourout/datasets/tree/master/TibetanMNIST')
    task_path = assert_dirs(root, 'mnist_tibetan')
    url_list = ['https://raw.githubusercontent.com/Hourout/datasets/master/TibetanMNIST/TibetanMNIST_28_28_01.csv',
                'https://raw.githubusercontent.com/Hourout/datasets/master/TibetanMNIST/TibetanMNIST_28_28_02.csv']
    data = pd.DataFrame()
    for url in url_list:
        s = requests.get(url).content
        data = pd.concat([data, pd.read_csv(io.StringIO(s.decode('utf-8')),header=None, dtype='uint8')])
    train = data.loc[:, 1:].values.reshape(-1, 28, 28)
    train_label = data.loc[:, 0].values
    for i in set(train_label):
        gfile.makedirs(gfile.path_join(task_path, 'train', str(i)))
    for idx in range(train.shape[0]):
        save_image(gfile.path_join(task_path, 'train', str(train_label[idx]), str(idx)+'.png'),
                   array_to_image(train[idx].reshape(28, 28, 1)))
    print('mnist_tibetan dataset download completed, run time %d min %.2f sec' %divmod((time.time()-start), 60))
    return task_path
Example #8
0
def stl10(root):
    """Stl10 dataset from http://ai.stanford.edu/~acoates/stl10
    
    The STL-10 dataset is an image recognition dataset for developing 
    unsupervised feature learning, deep learning, self-taught learning algorithms.
    It is inspired by the CIFAR-10 dataset but with some modifications. 
    In particular, each class has fewer labeled training examples than in CIFAR-10, 
    but a very large set of unlabeled examples is provided to learn image models 
    prior to supervised training. The primary challenge is to make use of the 
    unlabeled data (which comes from a similar but different 
    distribution from the labeled data) to build a useful prior. 
    We also expect that the higher resolution of this dataset (96x96) 
    will make it a challenging benchmark for developing 
    more scalable unsupervised learning methods.
    
    Attention: if exist dirs `root/stl10`, api will delete it and create it.
    Data storage directory:
    root = `/user/.../mydata`
    stl10 data: 
    `root/stl10/train/1/xx.png`
    `root/stl10/train/4/xx.png`
    `root/stl10/train/8/xx.png`
    `root/stl10/test/1/xx.png`
    `root/stl10/test/4/xx.png`
    `root/stl10/test/8/xx.png`
    Args:
        root: str, Store the absolute path of the data directory.
              example:if you want data path is `/user/.../mydata/stl10`,
              root should be `/user/.../mydata`.
    Returns:
        Store the absolute path of the data directory, is `root/stl10`.
    """
    start = time.time()
    task_path = assert_dirs(root, 'stl10')
    url = "http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz"
    rq.files(url, gfile.path_join(task_path, url.split('/')[-1]))
    un_tar(un_gz(gfile.path_join(task_path, url.split('/')[-1])))
    
    with open(gfile.path_join(task_path, 'stl10_binary/stl10_binary/test_X.bin'), 'rb') as fin:
        data = np.frombuffer(fin.read(), dtype=np.uint8).reshape(-1, 3,96,96).transpose((0, 3, 2, 1))
    with open(gfile.path_join(task_path, 'stl10_binary/stl10_binary/test_y.bin'), 'rb') as fin:
        data_label = np.frombuffer(fin.read(), dtype=np.uint8)
    for i in set(data_label):
        gfile.makedirs(gfile.path_join(task_path, 'test', str(i)))
    for idx in range(data.shape[0]):
        save_image(gfile.path_join(task_path, 'test', str(data_label[idx]), str(idx)+'.png'), array_to_image(data[idx]))
    
    with open(gfile.path_join(task_path, 'stl10_binary/stl10_binary/train_X.bin'), 'rb') as fin:
        data = np.frombuffer(fin.read(), dtype=np.uint8).reshape(-1, 3,96,96).transpose((0, 3, 2, 1))
    with open(gfile.path_join(task_path, 'stl10_binary/stl10_binary/train_y.bin'), 'rb') as fin:
        data_label = np.frombuffer(fin.read(), dtype=np.uint8)
    for i in set(data_label):
        gfile.makedirs(gfile.path_join(task_path, 'train', str(i)))
    for idx in range(data.shape[0]):
        save_image(gfile.path_join(task_path, 'train', str(data_label[idx]), str(idx)+'.png'), array_to_image(data[idx]))

    with open(gfile.path_join(task_path, 'stl10_binary/stl10_binary/unlabeled_X.bin'), 'rb') as fin:
        data = np.frombuffer(fin.read(), dtype=np.uint8).reshape(-1, 3,96,96).transpose((0, 3, 2, 1))
    gfile.makedirs(gfile.path_join(task_path, 'unlabeled'))
    for idx in range(data.shape[0]):
        save_image(gfile.path_join(task_path, 'unlabeled', str(idx)+'.png'), array_to_image(data[idx]))
    
    gfile.remove(gfile.path_join(task_path, 'stl10_binary.tar.gz'))
    gfile.remove(gfile.path_join(task_path, 'stl10_binary.tar'))
    gfile.remove(path_join(task_path, 'stl10_binary'))
    print('stl10 dataset download completed, run time %d min %.2f sec' %divmod((time.time()-start), 60))
    return task_path