def load_data(): """Loads the Fashion MNIST dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ path_x_train = urllib.request.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz', 'train-images-idx3-ubyte.gz') path_y_train = urllib.request.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz', 'train-labels-idx1-ubyte.gz') path_x_test = urllib.request.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz', 't10k-images-idx3-ubyte.gz') path_y_test = urllib.request.urlretrieve('http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz', 't10k-labels-idx1-ubyte.gz') print(path_x_train) with gzip.open('train-images-idx3-ubyte.gz', 'rb') as f: x_train = idx2numpy.convert_from_string(f.read()) with gzip.open('train-labels-idx1-ubyte.gz', 'rb') as f: y_train = idx2numpy.convert_from_string(f.read()) with gzip.open('t10k-images-idx3-ubyte.gz', 'rb') as f: x_test = idx2numpy.convert_from_string(f.read()) with gzip.open('t10k-labels-idx1-ubyte.gz', 'rb') as f: y_test = idx2numpy.convert_from_string(f.read()) return (x_train, y_train), (x_test, y_test)
def __parse_file(self, filenames): output_ = dict() for filename in filenames: pre = "train" if "train" in filename else "test" post = [t for t in ["images", "labels"] if t in filename][0] name = pre + "_" + post with gzip.open(filename, "rb") as f: array_temp = idx2numpy.convert_from_string(f.read()) if post == "images": if self.rotate: array_temp = rot90(array_temp, k=-1, axes=(-2, -1))[..., ::-1] output_[name] = array_temp return ( output_["train_images"], output_["train_labels"], output_["test_images"], output_["test_labels"], )
def load_data(): """Loads the Fashion MNIST dataset. # Arguments path: path where to cache the dataset locally (relative to ~/.keras/datasets). # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ with gzip.open('data/fashion_mnist/train-images-idx3-ubyte.gz', 'rb') as f: x_train = idx2numpy.convert_from_string(f.read()) with gzip.open('data/fashion_mnist/train-labels-idx1-ubyte.gz', 'rb') as f: y_train = idx2numpy.convert_from_string(f.read()) with gzip.open('data/fashion_mnist/t10k-images-idx3-ubyte.gz', 'rb') as f: x_test = idx2numpy.convert_from_string(f.read()) with gzip.open('data/fashion_mnist/t10k-labels-idx1-ubyte.gz', 'rb') as f: y_test = idx2numpy.convert_from_string(f.read()) return (x_train, y_train), (x_test, y_test)
def __parse_file(self, filename): output_ = dict() dir_name = os.path.dirname(filename) with zipfile.ZipFile(filename) as f_in: datasets = [ f for f in f_in.namelist() if "-" + self.dataset + "-" in f and f.endswith(".gz") ] for dataset in datasets: dataset_with_full_path = os.path.join(dir_name, dataset) if not os.path.isfile(dataset_with_full_path): f_in.extract(dataset, dir_name) pre = [ t for t in ["train", "test"] if t in dataset_with_full_path ][0] post = [ t for t in ["images", "labels"] if t in dataset_with_full_path ][0] name = pre + "_" + post with gzip.open(dataset_with_full_path, "rb") as f: array_temp = idx2numpy.convert_from_string(f.read()) if post == "images": if self.rotate: array_temp = rot90(array_temp, k=-1, axes=(-2, -1))[..., ::-1] output_[name] = array_temp return ( output_["train_images"], output_["train_labels"], output_["test_images"], output_["test_labels"], )
def test_correct(self): # Unsigned byte. result = idx2numpy.convert_from_string( b'\x00\x00\x08\x01\x00\x00\x00\x03' + b'\x0A' + b'\x0B' + b'\xFF') self.assertEqual(np.ndim(result), 1) self.assertEqual(np.shape(result), (3,)) self.assertSequenceEqual( self._to_list(result), [0x0A, 0x0B, 0xFF]) # Signed byte. result = idx2numpy.convert_from_string( b'\x00\x00\x09\x01\x00\x00\x00\x04' + b'\xFE' + b'\xFF' + b'\x00' + b'\xAA') self.assertEqual(np.ndim(result), 1) self.assertEqual(np.shape(result), (4,)) self.assertSequenceEqual( self._to_list(result), [-2, -1, 0x00, -86]) # Short. result = idx2numpy.convert_from_string( b'\x00\x00\x0B\x01\x00\x00\x00\x02' + b'\xF0\x05' + b'\x00\xFF') self.assertEqual(np.ndim(result), 1) self.assertEqual(np.shape(result), (2,)) self.assertSequenceEqual( self._to_list(result), [-4091, 255]) # Integer. result = idx2numpy.convert_from_string( b'\x00\x00\x0C\x01\x00\x00\x00\x03' + b'\x00\xFF\x00\xFF' + b'\x80\x00\x00\x00' + b'\x00\x00\x00\x00') self.assertEqual(np.ndim(result), 1) self.assertEqual(np.shape(result), (3,)) self.assertSequenceEqual( self._to_list(result), [0x00FF00FF, -0x80000000, 0x00]) # Float. # So fat, no tests. # Double. result = idx2numpy.convert_from_string( b'\x00\x00\x0E\x01\x00\x00\x00\x05' + b'\x3F\xF0\x00\x00\x00\x00\x00\x00' + b'\x40\x00\x00\x00\x00\x00\x00\x00' + b'\xC0\x00\x00\x00\x00\x00\x00\x00' + b'\x00\x00\x00\x00\x00\x00\x00\x00' + b'\x80\x00\x00\x00\x00\x00\x00\x00') self.assertEqual(np.ndim(result), 1) self.assertEqual(np.shape(result), (5,)) self.assertSequenceEqual( self._to_list(result), [1.0, 2.0, -2.0, 0.0, -0.0])
def extract_files_linux(self, file_path): import subprocess proc = subprocess.Popen(["gunzip", "-c", file_path], stdout=subprocess.PIPE) return idx2numpy.convert_from_string(proc.stdout.read())