def data(): fname = _download('http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz') with _taropen(fname, 'r') as f: # The first four batches are used as training set... datas, labels = [], [] for i in range(1, 5): with f.extractfile('cifar-10-batches-py/data_batch_' + str(i)) as b: batch = _pickle.load(b, encoding='latin1') datas.append(_np.array(batch['data'], dtype=_np.float32)) labels.append(_np.array(batch['labels'])) Xtr = _np.concatenate(datas) ytr = _np.concatenate(labels) Xtr /= 255 # ... and the fifth as validation set as described in cuda-convnet: # https://code.google.com/p/cuda-convnet/wiki/Methodology with f.extractfile('cifar-10-batches-py/data_batch_5') as b: batch = _pickle.load(b, encoding='latin1') Xva = _np.array(batch['data'], dtype=_np.float32) yva = _np.array(batch['labels']) Xva /= 255 with f.extractfile('cifar-10-batches-py/test_batch') as b: batch = _pickle.load(b, encoding='latin1') Xte = _np.array(batch['data'], dtype=_np.float32) yte = _np.array(batch['labels']) Xte /= 255 return (Xtr, ytr), (Xva, yva), (Xte, yte)
def data(): fname = _download('http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz') with _gzip.open(fname, 'rb') as f: if _sys.version_info[0] == 3: return _pickle.load(f, encoding='latin1') else: return _pickle.load(f)
def data(): fname = _download( 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz') with _gzip.open(fname, 'rb') as f: if _sys.version_info[0] == 3: return _pickle.load(f, encoding='latin1') else: return _pickle.load(f)
def data(): fname = _download( 'http://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz') with _taropen(fname, 'r') as f: with f.extractfile('cifar-100-python/train') as train: train = _pickle.load(train, encoding='latin1') Xtr = _np.array(train['data'], dtype=_np.float32) ytr_c = _np.array(train['coarse_labels']) ytr_f = _np.array(train['fine_labels']) Xtr /= 255 # There is no "official" validation set here that I know of! # But the maxout paper uses the last 10k samples as validation. Xtr, Xva = Xtr[:-10000], Xtr[-10000:] ytr_c, yva_c = ytr_c[:-10000], ytr_c[-10000:] ytr_f, yva_f = ytr_f[:-10000], ytr_f[-10000:] with f.extractfile('cifar-100-python/test') as test: test = _pickle.load(test, encoding='latin1') Xte = _np.array(test['data'], dtype=_np.float32) yte_c = _np.array(test['coarse_labels']) yte_f = _np.array(test['fine_labels']) Xte /= 255 # Get the label names additionally. with f.extractfile('cifar-100-python/meta') as m: m = _pickle.load(m, encoding='latin1') try: from sklearn.preprocessing import LabelEncoder le_c = LabelEncoder() le_c.classes_ = _np.array(m['coarse_label_names']) le_f = LabelEncoder() le_f.classes_ = _np.array(m['fine_label_names']) except ImportError: le_c = _np.array(m['coarse_label_names']) le_f = _np.array(m['fine_label_names']) return (Xtr, ytr_c, ytr_f), (Xva, yva_c, yva_f), (Xte, yte_c, yte_f), (le_c, le_f)