def combine_batches(path): """ Path points to the directory cifar-10-batches-py. Code based on: https://github.com/tensorflow/tensorflow/blob/r1.13/tensorflow/python/keras/ datasets/cifar10.py """ num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def load_data(label_mode='fine'): """Loads CIFAR100 dataset. Arguments: label_mode: one of "fine", "coarse". Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. Raises: ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def get_data( data_path: str ) -> Tuple[Tuple[np.ndarray, np.ndarray], Tuple[np.ndarray, np.ndarray]]: num_train_samples = 50000 train_data = np.empty((num_train_samples, 3, 32, 32), dtype="uint8") train_labels = np.empty((num_train_samples, ), dtype="uint8") for i in range(1, 6): fpath = os.path.join(data_path, "data_batch_" + str(i)) ( train_data[(i - 1) * 10000:i * 10000, :, :, :], train_labels[(i - 1) * 10000:i * 10000], ) = load_batch(fpath) fpath = os.path.join(data_path, "test_batch") test_data, test_labels = load_batch(fpath) train_labels = np.reshape(train_labels, (len(train_labels), 1)) test_labels = np.reshape(test_labels, (len(test_labels), 1)) if keras.backend.image_data_format() == "channels_last": train_data = train_data.transpose(0, 2, 3, 1) test_data = test_data.transpose(0, 2, 3, 1) return (train_data, train_labels), (test_data, test_labels)
def load_cifar10_data(path='cifar-10-batches-py'): from tensorflow.python.keras.datasets.cifar import load_batch num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) return (x_train, y_train), (x_test, y_test)
def load_data(): """Loads CIFAR10 dataset. # Returns Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = 'cifar-10-batches-py' origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file(dirname, origin=origin, untar=True, cache_dir='.') num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def load_data(): num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') path = os.getcwd() + '/cifar-10-batches-py' for i in range(1, 6): f_path = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(f_path) f_path = os.path.join(path, 'test_batch') x_test, y_test = load_batch(f_path) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) return (x_train, y_train), (x_test, y_test)
def data(): """ https://github.com/tensorflow/tensorflow/blob/v2.3.0/tensorflow/python/keras/datasets/cifar10.py """ path = 'cifar-10/cifar-10-batches-py' num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)).astype(np.int32) y_test = np.reshape(y_test, (len(y_test), 1)).astype(np.int32) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def load_data(path="TensorFlow/data/mnist.npz"): if path=="TensorFlow/data/mnist.npz": f = np.load(path) x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] f.close() return (x_train, y_train), (x_test, y_test) elif path=="TensorFlow/data/cifar-10-batches-py/": num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) return (x_train, y_train), (x_test, y_test)
def load_data(): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ from tensorflow.python.keras.datasets.cifar import load_batch from tensorflow.python.keras import backend as K dirname = 'cifar-10-batches-py' #origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' #path = get_file(dirname, origin=origin, untar=True) path = '/mnt/bb/%s/%s'%(os.environ['USER'],dirname) num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) #if K.image_data_format() == 'channels_last': #x_train = x_train.transpose(0, 2, 3, 1) #x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def load_data(): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = 'cifar-10-batches-py' origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file(dirname, origin=origin, untar=True) num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def load_data(path='/cifar10/cifar-10-batches-py'): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def load_cifar100(split, path=None): if path is None: cache_path = os.path.join(os.path.expanduser('~'), ".capslayer") path = get_file('cifar-100-python', cache_dir=cache_path, file_hash=md5sum, origin=URL, untar=True) split = split.lower() if split == 'test': fpath = os.path.join(path, 'test') images, labels = load_batch(fpath, label_key='fine_labels') else: fpath = os.path.join(path, 'train') images, labels = load_batch(fpath, label_key='fine_labels') idx = np.arange(len(images)) np.random.seed(201808) np.random.shuffle(idx) labels = np.reshape(labels, (-1, )) images = images[idx[:45000]] if split == "train" else images[ idx[45000:]] labels = labels[idx[:45000]] if split == "train" else labels[ idx[45000:]] images = np.reshape(images.transpose(0, 2, 3, 1), (-1, 3072)).astype(np.float32) labels = np.reshape(labels, (-1, )).astype(np.int32) return (zip(images, labels))
def read_cifar_100_data(path_data, subdir, label_mode='fine', STANDARDIZE_BOOL = True): """Loads [CIFAR100 dataset](https://www.cs.toronto.edu/~kriz/cifar.html). This is a dataset of 50,000 32x32 color training images and 10,000 test images, labeled over 100 fine-grained classes that are grouped into 20 coarse-grained classes. See more info at the [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). Arguments: label_mode: one of "fine", "coarse". If it is "fine" the category labels are the fine-grained labels, if it is "coarse" the output labels are the coarse-grained superclasses. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. x_train, x_test: uint8 arrays of RGB image data with shape (num_samples, 3, 32, 32) if the `tf.keras.backend.image_data_format` is 'channels_first', or (num_samples, 32, 32, 3) if the data format is 'channels_last'. y_train, y_test: uint8 arrays of category labels with shape (num_samples, 1). Raises: ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file( dirname, origin=origin, untar=True, file_hash= '85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7', cache_dir = path_data, cache_subdir = subdir) fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = onp.reshape(y_train, (len(y_train), 1)) y_test = onp.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) # Pre-processing (normalize) train_images = onp.divide(x_train, 255, dtype=onp.float32) test_images = onp.divide(x_test, 255, dtype=onp.float32) train_labels = dense_to_one_hot(y_train, num_classes=100) test_labels = dense_to_one_hot(y_test, num_classes=100) if STANDARDIZE_BOOL: channel_mean = onp.mean(train_images, axis=(0,1,2), dtype=onp.float32, keepdims=True) channel_std = onp.std(train_images, axis=(0,1,2), dtype=onp.float32, keepdims=True) train_images = (train_images - channel_mean) / channel_std test_images = (test_images - channel_mean) / channel_std dataset = { 'train': {'input': train_images, 'label': train_labels}, 'test': {'input': test_images, 'label': test_labels}} return dataset
def prep_data(): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ print("##data_prep called...") i_cdir = "../../" i_imgpath = "1000_left.jpeg" config = cutil.Config(configid="myConfId",cdir=i_cdir) img1 = myimg.myImg(imageid="xx",config=config,ekey='x123',path=i_imgpath) img1.printImageProp() train_samples = 1 w, h = img1.getImageDim() #x_train1 = np.empty(( train_samples, 3, w, h), dtype='uint8') x_train1 = np.empty(( train_samples, w, h, 3), dtype='uint8') print(" x_train1 size [{}]".format(x_train1.shape)) x_train1[ 0, :, :, :] = img1.getImage() print(" x_train1 size [{}]".format(x_train1.shape)) #x_train1 = x_train1.transpose(0, 2, 3, 1) #print(" x_train1 size [{}]".format(x_train1.shape)) dirname = 'cifar-10-batches-py' #./.keras/datasets/cifar-10-batches-py.tar.gz origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' origin = 'file://Users/pankaj.petkar/.keras/datasets/cifar-10-batches-py.tar.gz' path = get_file(dirname, origin=origin, untar=True) num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) print(x_train.shape) print(y_train.shape) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) print(x_train.shape) print(y_train.shape) return (x_train, y_train), (x_test, y_test)
def load_data(filename): """Loads [CIFAR10 dataset](https://www.cs.toronto.edu/~kriz/cifar.html). This is a dataset of 50,000 32x32 color training images and 10,000 test images, labeled over 10 categories. See more info at the [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. **x_train, x_test**: uint8 arrays of RGB image data with shape `(num_samples, 3, 32, 32)` if `tf.keras.backend.image_data_format()` is `'channels_first'`, or `(num_samples, 32, 32, 3)` if the data format is `'channels_last'`. **y_train, y_test**: uint8 arrays of category labels (integers in range 0-9) each with shape (num_samples, 1). """ dirname = 'cifar-10-batches-py' origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' if filename != None: t = open(filename) t.extractall(path='./') path = dirname else: path = get_file( dirname, origin=origin, untar=True, file_hash= '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce') num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) return (x_train, y_train), (x_test, y_test)
def load_data(self, rank): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ #path = '/projects/datascience/hsharma/bnn_horovod/TFP_CIFAR10/RunScript/cifar-10-batches-py' #path = '/home/hsharma/WORK/Project_BNN/bnn_horovod/TFP_CIFAR10/cifar-10-batches-py' if self.FLAGS.DATA_NAME == 'CIFAR-10': path = self.FLAGS.DATA_PATH print(path) num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) x_train = x_train.astype("float32") x_test = x_test.astype("float32") x_train /= 255 x_test /= 255 if self.FLAGS.subtract_pixel_mean: x_train_mean = np.mean(x_train, axis=0) x_train -= x_train_mean x_test -= x_train_mean # y_train = y_train.flatten() # y_test = y_test.flatten() y_train = np.int32(y_train) y_test = np.int32(y_test) else: print("ERROR: The dataset is not Available...!") return return (x_train, y_train), (x_test, y_test)
def load_data(label_mode='fine'): """Loads [CIFAR100 dataset](https://www.cs.toronto.edu/~kriz/cifar.html). This is a dataset of 50,000 32x32 color training images and 10,000 test images, labeled over 100 fine-grained classes that are grouped into 20 coarse-grained classes. See more info at the [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). Args: label_mode: one of "fine", "coarse". If it is "fine" the category labels are the fine-grained labels, if it is "coarse" the output labels are the coarse-grained superclasses. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. **x_train, x_test**: uint8 arrays of RGB image data with shape `(num_samples, 3, 32, 32)` if `tf.keras.backend.image_data_format()` is `'channels_first'`, or `(num_samples, 32, 32, 3)` if the data format is `'channels_last'`. **y_train, y_test**: uint8 arrays of category labels with shape (num_samples, 1). Raises: ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file( dirname, origin=origin, untar=True, file_hash= '85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7') fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def get_cifar10(): num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') filedir = os.path.dirname(__file__) path = os.path.join(filedir, "../../data/", "cifar-10-batches-py") for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) return (x_train, y_train), (x_test, y_test)
def read_cifar_10_data(path_data, subdir, STANDARDIZE_BOOL = True): dirname = 'cifar-10-batches-py' origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file(dirname, origin=origin, untar=True, file_hash='6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce', cache_dir=path_data, cache_subdir=subdir) num_train_samples = 50000 x_train = onp.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = onp.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1)*10000: i*10000,:,:,:], y_train[(i - 1)*10000: i*10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = onp.reshape(y_train, (len(y_train), 1)) y_test = onp.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) # Pre-processing (normalize) train_images = onp.divide(x_train, 255, dtype=onp.float32) test_images = onp.divide(x_test, 255, dtype=onp.float32) train_labels = dense_to_one_hot(y_train, num_classes=10) test_labels = dense_to_one_hot(y_test, num_classes=10) if STANDARDIZE_BOOL: channel_mean = onp.mean(train_images, axis=(0,1,2), dtype=onp.float32, keepdims=True) channel_std = onp.std(train_images, axis=(0,1,2), dtype=onp.float32, keepdims=True) train_images = (train_images - channel_mean) / channel_std test_images = (test_images - channel_mean) / channel_std dataset = { 'train': {'input': train_images, 'label': train_labels}, 'test': {'input': test_images, 'label': test_labels}, } return dataset
def cifar10(train=True, data_path='cifar10'): """Returns mnist loaded data. Args: train: if true use training data, else use testing data Returns: dataset: A Dataset namedtuple containing the generated data and labels """ num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(data_path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(data_path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) # if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) print ("image_shape!!!!!!") print (x_test.shape) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) x_train = x_train.astype(np.float32) / 255. y_train = y_train.astype(np.int32) x_test = x_test.astype(np.float32) / 255. y_test = y_test.astype(np.int32) if train: imgs = x_train labels = y_train else: imgs = x_test labels = y_test return Dataset(imgs, labels)
def load_data(dataset_base_path, train=True, label_mode="fine"): """Loads CIFAR100 dataset. Parameters ---------- dataset_base_path: str Path to create dataset dir, a recommended choice is project root dir train: bool flag, return training set or test set label_mode: str one of "fine", "coarse". Returns ------- Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. Raises ------ ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file(dirname, origin=origin, untar=True, cache_dir=dataset_base_path) if train: fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') # y_train = np.reshape(y_train, (len(y_train), 1)) y_train = np.asarray(y_train) if K.image_data_format() == 'channels_last': x_train = x_train.transpose((0, 2, 3, 1)) return x_train, y_train else: fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') # y_test = np.reshape(y_test, (len(y_test), 1)) y_test = np.asarray(y_test) if K.image_data_format() == 'channels_last': x_test = x_test.transpose((0, 2, 3, 1)) return x_test, y_test
def load_cifar_data(): dirname = 'cifar-10-batches-py' path = "datasets/" + dirname if os.path.exists(path): print("Local data found.") else: print("Local data not found. Retrieving from source.") origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' get_file( dirname, origin=origin, untar=True, file_hash= '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce', cache_dir=os.getcwd()), num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) return (x_train, y_train), (x_test, y_test)
def load_data(dataset_base_path, train=True): """Loads CIFAR10 dataset. Parameters ---------- dataset_base_path: str Path to create dataset dir, a recommended choice is project root dir train: bool flag, return training set or test set Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ dirname = 'cifar-10-batches-py' origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file(dirname, origin=origin, untar=True, cache_dir=dataset_base_path) if train: num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) # y_train = np.reshape(y_train, (len(y_train), 1)) y_train = np.asarray(y_train) if K.image_data_format() == 'channels_last': x_train = x_train.transpose((0, 2, 3, 1)) return x_train, y_train else: fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) # y_test = np.reshape(y_test, (len(y_test), 1)) y_test = np.asarray(y_test) if K.image_data_format() == 'channels_last': x_test = x_test.transpose((0, 2, 3, 1)) return x_test, y_test
def load_cifar10(path, split): split = split.lower() if split == 'eval': fpath = os.path.join(path, 'cifar-10-batches-py', 'test_batch') images, labels = load_batch(fpath) else: num_samples = 50000 images = np.empty((num_samples, 3, 32, 32), dtype='uint8') labels = np.empty((num_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'cifar-10-batches-py', 'data_batch_' + str(i)) (images[(i - 1) * 10000:i * 10000, :, :, :], labels[(i - 1) * 10000:i * 10000]) = load_batch(fpath) images = np.reshape(images.transpose(0, 2, 3, 1), (-1, 3072)).astype(np.float32) labels = np.reshape(labels, (-1, )).astype(np.int32) return (zip(images, labels))
def cifar10_load_data(path): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ #dirname = 'cifar-10-batches-py' # origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' # path = get_file( # dirname, # origin=origin, # untar=True, # file_hash= # '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce') num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) return (x_train, y_train), (x_test, y_test)
def load_data(dirname): """Loads CIFAR10 dataset. Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. """ # num_train_samples = 50000 # x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') # y_train = np.empty((num_train_samples,), dtype='uint8') train_path = os.path.join(dirname, 'train') test_path = os.path.join(dirname, 'test') label_key = "fine_labels" if is_fine_label else "coarse_labels" x_train, y_train = load_batch(train_path, label_key=label_key) x_test, y_test = load_batch(test_path, label_key=label_key) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def load_data(label_mode='fine', data_directory=None): """Loads CIFAR100 dataset. Reference: https://github.com/tensorflow/tensorflow/blob/v2.0.0/tensorflow/python/keras/datasets/cifar100.py Arguments: label_mode: one of "fine", "coarse". Returns: Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. Raises: ValueError: in case of invalid `label_mode`. """ if label_mode not in ['fine', 'coarse']: raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file( dirname, origin=origin, untar=True, file_hash='85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b8' '55ba677a7', cache_dir=data_directory) fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)
def load_cifar10(split, path=None): if path is None: cache_path = os.path.join(os.path.expanduser('~'), ".capslayer") path = get_file('cifar-10-batches-py', cache_dir=cache_path, file_hash=md5sum, origin=URL, untar=True) split = split.lower() if split == 'test': fpath = os.path.join(path, 'test_batch') images, labels = load_batch(fpath) else: num_samples = 50000 images = np.empty((num_samples, 3, 32, 32), dtype='uint8') labels = np.empty((num_samples, ), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (images[(i - 1) * 10000:i * 10000, :, :, :], labels[(i - 1) * 10000:i * 10000]) = load_batch(fpath) idx = np.arange(len(images)) np.random.seed(201808) np.random.shuffle(idx) images = images[idx[:45000]] if split == "train" else images[ idx[45000:]] labels = labels[idx[:45000]] if split == "train" else labels[ idx[45000:]] images = np.reshape(images.transpose(0, 2, 3, 1), (-1, 3072)).astype(np.float32) labels = np.reshape(labels, (-1, )).astype(np.int32) return (zip(images, labels))
def load_data(): """Loads the CIFAR10 dataset. This is a dataset of 50,000 32x32 color training images and 10,000 test images, labeled over 10 categories. See more info at the [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). The classes are: | Label | Description | |:-----:|-------------| | 0 | airplane | | 1 | automobile | | 2 | bird | | 3 | cat | | 4 | deer | | 5 | dog | | 6 | frog | | 7 | horse | | 8 | ship | | 9 | truck | Returns: Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. **x_train**: uint8 NumPy array of grayscale image data with shapes `(50000, 32, 32, 3)`, containing the training data. Pixel values range from 0 to 255. **y_train**: uint8 NumPy array of labels (integers in range 0-9) with shape `(50000, 1)` for the training data. **x_test**: uint8 NumPy array of grayscale image data with shapes (10000, 32, 32, 3), containing the test data. Pixel values range from 0 to 255. **y_test**: uint8 NumPy array of labels (integers in range 0-9) with shape `(10000, 1)` for the test data. Example: ```python (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data() assert x_train.shape == (50000, 32, 32, 3) assert x_test.shape == (10000, 32, 32, 3) assert y_train.shape == (50000, 1) assert y_test.shape == (10000, 1) ``` """ dirname = 'cifar-10-batches-py' origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' path = get_file( dirname, origin=origin, untar=True, file_hash= '6d958be074577803d12ecdefd02955f39262c83c16fe9348329d7fe0b5c001ce') num_train_samples = 50000 x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples,), dtype='uint8') for i in range(1, 6): fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) x_test = x_test.astype(x_train.dtype) y_test = y_test.astype(y_train.dtype) return (x_train, y_train), (x_test, y_test)
) num_train_samples = 50000 # empty(shape[, dtype, order]) x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') y_train = np.empty((num_train_samples, ), dtype='uint8') for i in range(1, 6): # 如果参数中某个部分是绝对路径,则绝对路径前的路径都将被丢弃,并从绝对路径部分开始连接。 # load_batch return: # data = data.reshape(data.shape[0], 3, 32, 32) # labels = d[label_key] fpath = os.path.join(path, 'data_batch_' + str(i)) (x_train[(i - 1) * 10000:i * 10000, :, :, :], y_train[(i - 1) * 10000:i * 10000]) = load_batch(fpath) print('x_train.type:', type(x_train)) print('y_train.type:', type(y_train)) print('x_train.shape:', x_train.shape) print('y_train.shape:', np.shape(y_train)) fpath = os.path.join(path, 'test_batch') x_test, y_test = load_batch(fpath) print('x_test.shape:', x_test.shape) print('y_test.shape:', np.shape(y_test)) y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) print('y_train.shape:', y_train.shape) print('y_test.shape:', y_test.shape)
def load_data(label_mode='fine'): """Loads the CIFAR100 dataset. This is a dataset of 50,000 32x32 color training images and 10,000 test images, labeled over 100 fine-grained classes that are grouped into 20 coarse-grained classes. See more info at the [CIFAR homepage](https://www.cs.toronto.edu/~kriz/cifar.html). Args: label_mode: one of "fine", "coarse". If it is "fine" the category labels are the fine-grained labels, if it is "coarse" the output labels are the coarse-grained superclasses. Returns: Tuple of NumPy arrays: `(x_train, y_train), (x_test, y_test)`. **x_train**: uint8 NumPy array of grayscale image data with shapes `(50000, 32, 32, 3)`, containing the training data. Pixel values range from 0 to 255. **y_train**: uint8 NumPy array of labels (integers in range 0-99) with shape `(50000, 1)` for the training data. **x_test**: uint8 NumPy array of grayscale image data with shapes (10000, 32, 32, 3), containing the test data. Pixel values range from 0 to 255. **y_test**: uint8 NumPy array of labels (integers in range 0-99) with shape `(10000, 1)` for the test data. Example: ```python (x_train, y_train), (x_test, y_test) = keras.datasets.cifar100.load_data() assert x_train.shape == (50000, 32, 32, 3) assert x_test.shape == (10000, 32, 32, 3) assert y_train.shape == (50000, 1) assert y_test.shape == (10000, 1) ``` """ if label_mode not in ['fine', 'coarse']: raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') dirname = 'cifar-100-python' origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' path = get_file( dirname, origin=origin, untar=True, file_hash= '85cd44d02ba6437773c5bbd22e183051d648de2e7d6b014e1ef29b855ba677a7') fpath = os.path.join(path, 'train') x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') fpath = os.path.join(path, 'test') x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') y_train = np.reshape(y_train, (len(y_train), 1)) y_test = np.reshape(y_test, (len(y_test), 1)) if K.image_data_format() == 'channels_last': x_train = x_train.transpose(0, 2, 3, 1) x_test = x_test.transpose(0, 2, 3, 1) return (x_train, y_train), (x_test, y_test)