def _get_data(training=True): def _normalize_features(features): features_matrix = numpy.array([f.flatten() for f in features]) return (features_matrix - features_matrix.mean()) / 255. image_path = TRAIN_IMAGES if training else TEST_IMAGES label_path = TRAIN_LABELS if training else TEST_LABELS with open(image_path, 'rb') as f: features = _normalize_features(mnist.parse_idx(f)) with open(label_path, 'rb') as f: labels = mnist.parse_idx(f) return numpy.array([(features[i, :], labels[i]) for i in xrange(len(labels))])
def test_file_with_int_type_returns_correct_values(self): fd = io.BytesIO(b'\x00\x00\x0c\x01' b'\x00\x00\x00\x01' b'\x00\x00\x00\xff') # two's complement of 255 actual = mnist.parse_idx(fd) self.assertIsInstance(actual, numpy.ndarray) self.assertEqual([255], actual.tolist())
def test_file_with_negative_int_returns_correct_values(self): fd = io.BytesIO(b'\x00\x00\x0c\x01' b'\x00\x00\x00\x01' b'\xff\xff\xff\xff') # two's complement of -1 actual = mnist.parse_idx(fd) self.assertIsInstance(actual, np.ndarray) self.assertEqual([-1], actual.tolist())
def test_file_with_one_dimension_returns_correct_values(self): fd = io.BytesIO(b'\x00\x00\x08\x01' b'\x00\x00\x00\x02' b'\xff\x00') actual = mnist.parse_idx(fd) self.assertIsInstance(actual, np.ndarray) self.assertEqual([255, 0], actual.tolist())
def test_file_with_two_dimensions_returns_correct_values(self): fd = io.BytesIO(b'\x00\x00\x08\x02' b'\x00\x00\x00\x02' b'\x00\x00\x00\x03' b'\x00\x01\x02\x03\x04\x05') actual = mnist.parse_idx(fd) self.assertIsInstance(actual, np.ndarray) self.assertEqual([[0, 1, 2], [3, 4, 5]], actual.tolist())
def load_mnist(dataset: str, path: str): """ Retrives data from mnist data-files Args: dataset (str): dataset type, options are "train images", "train labels", "test imges", "test labels" path (str): path string to the folder of MNIST files. Returns numpy aray of crresponding data. """ filename_pointer = { "train images": "train-images.idx3-ubyte", "train labels": "train-labels.idx1-ubyte", "test images": "t10k-images.idx3-ubyte", "test labels": "t10k-labels.idx1-ubyte" } filename = path + filename_pointer[dataset] with open(filename, 'rb') as file: data = mnist.parse_idx(file) return data
return (sum( (np.argmax(x, axis=1) == np.argmax(y_true, axis=1))) / len(y_true)) def one_hot_vector(j): e = np.zeros(10) e[j] = 1.0 return e from sklearn import datasets import sklearn.metrics import mnist np.random.seed(1) trainx = mnist.parse_idx('train-images-idx3-ubyte') x = np.array([x.reshape(784) for x in trainx]) / 255 trainy = mnist.parse_idx('train-labels-idx1-ubyte') y = np.array([one_hot_vector(j) for j in trainy]) testx = mnist.parse_idx('t10k-images-idx3-ubyte') testx = Xs = [x.reshape(784) for x in testx] testy = mnist.parse_idx('t10k-labels-idx1-ubyte') testy = Ys = [one_hot_vector(j) for j in testy] nn = Network((784, 128, 64, 10), (Relu, Relu, Softmax), "1.bin") nn.fit( x, y,
def test_unexpected_items_raises_error(self): fd = io.BytesIO(b'\x00\x00\x08\x01' b'\x00\x00\x00\x02' b'\x00\x01\x02') with self.assertRaises(mnist.IdxDecodeError): mnist.parse_idx(fd)
def test_unknown_data_type_raises_error(self): fd = io.BytesIO(b'\x00\x00\xff\x00') with self.assertRaises(mnist.IdxDecodeError): mnist.parse_idx(fd)
def test_missing_initial_zeros_raises_exception(self): fd = io.BytesIO(b'\xff\xff\x08\x00') with self.assertRaises(mnist.IdxDecodeError): mnist.parse_idx(fd)
def test_missing_header_raises_exception(self): fd = io.BytesIO(b'\x00') with self.assertRaises(mnist.IdxDecodeError): mnist.parse_idx(fd)
def test_empty_file_raises_exception(self): fd = io.BytesIO(b'') with self.assertRaises(mnist.IdxDecodeError): mnist.parse_idx(fd)
def parse_mnist_file(fname): fopen = gzip.open if os.path.splitext(fname)[1] == '.gz' else open with fopen(fname, 'rb') as fd: return mnist.parse_idx(fd)