def load_data():
    """Loads the Fashion MNIST dataset.

    # Arguments
        path: path where to cache the dataset locally
            (relative to ~/.keras/datasets).

    # Returns
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """

    path_x_train = urllib.request.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz', 'train-images-idx3-ubyte.gz')
    path_y_train = urllib.request.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz', 'train-labels-idx1-ubyte.gz')
    path_x_test = urllib.request.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz', 't10k-images-idx3-ubyte.gz')
    path_y_test = urllib.request.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz', 't10k-labels-idx1-ubyte.gz')
    print(path_x_train)

    with gzip.open('train-images-idx3-ubyte.gz', 'rb') as f:
        x_train = idx2numpy.convert_from_string(f.read())
    with gzip.open('train-labels-idx1-ubyte.gz', 'rb') as f:
        y_train = idx2numpy.convert_from_string(f.read())
    with gzip.open('t10k-images-idx3-ubyte.gz', 'rb') as f:
        x_test = idx2numpy.convert_from_string(f.read())
    with gzip.open('t10k-labels-idx1-ubyte.gz', 'rb') as f:
        y_test  = idx2numpy.convert_from_string(f.read())

    return (x_train, y_train), (x_test, y_test)
Example #2
0
    def __parse_file(self, filenames):
        output_ = dict()

        for filename in filenames:
            pre = "train" if "train" in filename else "test"
            post = [t for t in ["images", "labels"] if t in filename][0]
            name = pre + "_" + post

            with gzip.open(filename, "rb") as f:
                array_temp = idx2numpy.convert_from_string(f.read())

                if post == "images":

                    if self.rotate:
                        array_temp = rot90(array_temp, k=-1,
                                           axes=(-2, -1))[..., ::-1]

                output_[name] = array_temp

        return (
            output_["train_images"],
            output_["train_labels"],
            output_["test_images"],
            output_["test_labels"],
        )
def load_data():
    """Loads the Fashion MNIST dataset.
    # Arguments
        path: path where to cache the dataset locally
            (relative to ~/.keras/datasets).
    # Returns
        Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
    """

    with gzip.open('data/fashion_mnist/train-images-idx3-ubyte.gz', 'rb') as f:
        x_train = idx2numpy.convert_from_string(f.read())
    with gzip.open('data/fashion_mnist/train-labels-idx1-ubyte.gz', 'rb') as f:
        y_train = idx2numpy.convert_from_string(f.read())
    with gzip.open('data/fashion_mnist/t10k-images-idx3-ubyte.gz', 'rb') as f:
        x_test = idx2numpy.convert_from_string(f.read())
    with gzip.open('data/fashion_mnist/t10k-labels-idx1-ubyte.gz', 'rb') as f:
        y_test = idx2numpy.convert_from_string(f.read())

    return (x_train, y_train), (x_test, y_test)
Example #4
0
    def __parse_file(self, filename):
        output_ = dict()
        dir_name = os.path.dirname(filename)

        with zipfile.ZipFile(filename) as f_in:
            datasets = [
                f for f in f_in.namelist()
                if "-" + self.dataset + "-" in f and f.endswith(".gz")
            ]

            for dataset in datasets:
                dataset_with_full_path = os.path.join(dir_name, dataset)

                if not os.path.isfile(dataset_with_full_path):
                    f_in.extract(dataset, dir_name)

                pre = [
                    t for t in ["train", "test"] if t in dataset_with_full_path
                ][0]
                post = [
                    t for t in ["images", "labels"]
                    if t in dataset_with_full_path
                ][0]
                name = pre + "_" + post

                with gzip.open(dataset_with_full_path, "rb") as f:
                    array_temp = idx2numpy.convert_from_string(f.read())
                    if post == "images":
                        if self.rotate:
                            array_temp = rot90(array_temp, k=-1,
                                               axes=(-2, -1))[..., ::-1]

                    output_[name] = array_temp

        return (
            output_["train_images"],
            output_["train_labels"],
            output_["test_images"],
            output_["test_labels"],
        )
Example #5
0
    def test_correct(self):
        # Unsigned byte.
        result = idx2numpy.convert_from_string(
            b'\x00\x00\x08\x01\x00\x00\x00\x03' +
            b'\x0A' +
            b'\x0B' +
            b'\xFF')
        self.assertEqual(np.ndim(result), 1)
        self.assertEqual(np.shape(result), (3,))
        self.assertSequenceEqual(
            self._to_list(result),
            [0x0A, 0x0B, 0xFF])

        # Signed byte.
        result = idx2numpy.convert_from_string(
            b'\x00\x00\x09\x01\x00\x00\x00\x04' +
            b'\xFE' +
            b'\xFF' +
            b'\x00' +
            b'\xAA')
        self.assertEqual(np.ndim(result), 1)
        self.assertEqual(np.shape(result), (4,))
        self.assertSequenceEqual(
            self._to_list(result),
            [-2, -1, 0x00, -86])

        # Short.
        result = idx2numpy.convert_from_string(
            b'\x00\x00\x0B\x01\x00\x00\x00\x02' +
            b'\xF0\x05' +
            b'\x00\xFF')
        self.assertEqual(np.ndim(result), 1)
        self.assertEqual(np.shape(result), (2,))
        self.assertSequenceEqual(
            self._to_list(result),
            [-4091, 255])

        # Integer.
        result = idx2numpy.convert_from_string(
            b'\x00\x00\x0C\x01\x00\x00\x00\x03' +
            b'\x00\xFF\x00\xFF' +
            b'\x80\x00\x00\x00' +
            b'\x00\x00\x00\x00')
        self.assertEqual(np.ndim(result), 1)
        self.assertEqual(np.shape(result), (3,))
        self.assertSequenceEqual(
            self._to_list(result),
            [0x00FF00FF, -0x80000000, 0x00])

        # Float.
        # So fat, no tests.

        # Double.
        result = idx2numpy.convert_from_string(
            b'\x00\x00\x0E\x01\x00\x00\x00\x05' +
            b'\x3F\xF0\x00\x00\x00\x00\x00\x00' +
            b'\x40\x00\x00\x00\x00\x00\x00\x00' +
            b'\xC0\x00\x00\x00\x00\x00\x00\x00' +
            b'\x00\x00\x00\x00\x00\x00\x00\x00' +
            b'\x80\x00\x00\x00\x00\x00\x00\x00')
        self.assertEqual(np.ndim(result), 1)
        self.assertEqual(np.shape(result), (5,))
        self.assertSequenceEqual(
            self._to_list(result),
            [1.0, 2.0, -2.0, 0.0, -0.0])
Example #6
0
 def extract_files_linux(self, file_path):
     import subprocess
     proc = subprocess.Popen(["gunzip", "-c", file_path],
                             stdout=subprocess.PIPE)
     return idx2numpy.convert_from_string(proc.stdout.read())