def __init__(self, array, batch_size=None): try: from skdata.mnist.view import OfficialVectorClassification except ImportError: from skdata.mnist.views import OfficialVectorClassification self.mnist = OfficialVectorClassification() self.train_idx = self.mnist.fit_idxs self.val_idx = self.mnist.val_idxs self.test_idx = self.mnist.tst_idxs self.N_train = self.train_idx.shape[0] self.N_val = self.val_idx.shape[0] self.N_test = self.test_idx.shape[0] self.D = self.mnist.all_vectors.shape[1] if array == 'train': self.data = gpuarray.to_gpu( self.mnist.all_vectors[self.train_idx].astype(np.float32) / 255., allocator=memory_pool.allocate) targets = self.mnist.all_labels[self.train_idx] labels_soft = np.zeros((self.N_train, 10), dtype=np.float32) labels_soft[range(self.N_train), targets] = 1. self.targets = gpuarray.to_gpu(labels_soft, allocator=memory_pool.allocate) self.N = self.N_train elif array == 'val': self.data = gpuarray.to_gpu( self.mnist.all_vectors[self.val_idx].astype(np.float32) / 255., allocator=memory_pool.allocate) self.N = self.N_val targets = self.mnist.all_labels[self.val_idx] labels_soft = np.zeros((self.N_train, 10), dtype=np.float32) labels_soft[range(self.N_val), targets] = 1. self.targets = gpuarray.to_gpu(labels_soft, allocator=memory_pool.allocate) elif array == 'test': self.data = gpuarray.to_gpu( self.mnist.all_vectors[self.test_idx].astype(np.float32) / 255., allocator=memory_pool.allocate) targets = self.mnist.all_labels[self.test_idx] labels_soft = np.zeros((self.N_test, 10), dtype=np.float32) labels_soft[range(self.N_test), targets] = 1. self.targets = gpuarray.to_gpu(labels_soft, allocator=memory_pool.allocate) self.N = self.N_test else: raise ValueError('Unknown partition "%s"' % array) self.batch_size = batch_size if batch_size is not None else self.N self.i = 0 self._make_batches()
**input_shapes) # The above executor computes gradients. When evaluating test data we don't need this. # We want this executor to share weights with the above one, so we will use bind # (instead of simple_bind) and use the other executor's arguments. executor_test = cost_classification.bind(ctx=mx.cpu(0), grad_req='null', args=executor.arg_arrays) # initialize the weights for r in executor.arg_arrays: r[:] = np.random.randn(*r.shape) * 0.02 # Using skdata to get mnist data. This is for portability. Can sub in any data loading you like. from skdata.mnist.view import OfficialVectorClassification data = OfficialVectorClassification() trIdx = data.sel_idxs[:] teIdx = data.val_idxs[:] for epoch in range(20): np.random.shuffle(trIdx) for x in range(0, len(trIdx), batch_size): # extract a batch from mnist batchX = data.all_vectors[trIdx[x:x + batch_size]] batchY = data.all_labels[trIdx[x:x + batch_size]] # our executor was bound to 128 size. Throw out non matching batches. if batchX.shape[0] != batch_size: continue # Store batch in executor 'data' executor.arg_dict['data'][:] = batchX / 255.
class MNISTDataProvider(MiniBatchDataProvider): """``DataProvider`` that automatically provides data from the `MNIST <http://yann.lecun.com/exdb/mnist/>`_ data set of hand-written digits. Depends on the `skdata <http://jaberg.github.io/skdata/>`_ package. :param array: {'train', 'val', 'test'} Whether to use the official training, validation, or test data split of MNIST. :param batch_size: The size of mini-batches. """ try: from skdata.mnist.view import OfficialVectorClassification except ImportError: from skdata.mnist.views import OfficialVectorClassification mnist = OfficialVectorClassification() def __init__(self, array, batch_size=None): self.train_idx = self.mnist.fit_idxs self.val_idx = self.mnist.val_idxs self.test_idx = self.mnist.tst_idxs self.N_train = self.train_idx.shape[0] self.N_val = self.val_idx.shape[0] self.N_test = self.test_idx.shape[0] self.D = self.mnist.all_vectors.shape[1] if array == 'train': self.data = gpuarray.to_gpu( self.mnist.all_vectors[self.train_idx].astype(np.float32) / 255.) targets = self.mnist.all_labels[self.train_idx] labels_soft = np.zeros((self.N_train, 10), dtype=np.float32) labels_soft[range(self.N_train), targets] = 1. self.targets = gpuarray.to_gpu(labels_soft) self.N = self.N_train elif array == 'val': self.data = gpuarray.to_gpu( self.mnist.all_vectors[self.val_idx].astype(np.float32) / 255.) self.N = self.N_val targets = self.mnist.all_labels[self.val_idx] labels_soft = np.zeros((self.N_train, 10), dtype=np.float32) labels_soft[range(self.N_val), targets] = 1. self.targets = gpuarray.to_gpu(labels_soft) elif array == 'test': self.data = gpuarray.to_gpu( self.mnist.all_vectors[self.test_idx].astype(np.float32) / 255.) targets = self.mnist.all_labels[self.test_idx] labels_soft = np.zeros((self.N_test, 10), dtype=np.float32) labels_soft[range(self.N_test), targets] = 1. self.targets = gpuarray.to_gpu(labels_soft) self.N = self.N_test else: raise ValueError('Unknown partition "%s"' % array) self.batch_size = batch_size if batch_size is not None else self.N self.i = 0 self._make_batches()
class MNISTDataProvider(DataProvider): """``DataProvider`` that automatically provides data from the `MNIST <http://yann.lecun.com/exdb/mnist/>`_ data set of hand-written digits. Depends on the `skdata <http://jaberg.github.io/skdata/>`_ package. :param array: {'train', 'val', 'test'} Whether to use the official training, validation, or test data split of MNIST. :param batch_size: The size of mini-batches. """ from skdata.mnist.view import OfficialVectorClassification mnist = OfficialVectorClassification() def __init__(self, array, batch_size=None): self.train_idx = self.mnist.fit_idxs self.val_idx = self.mnist.val_idxs self.test_idx = self.mnist.tst_idxs self.N_train = self.train_idx.shape[0] self.N_val = self.val_idx.shape[0] self.N_test = self.test_idx.shape[0] self.D = self.mnist.all_vectors.shape[1] if array == 'train': self.data = gpuarray.to_gpu( self.mnist.all_vectors[self.train_idx].astype(np.float32) / 255.) targets = self.mnist.all_labels[self.train_idx] labels_soft = np.zeros((self.N_train, 10), dtype=np.float32) labels_soft[range(self.N_train), targets] = 1. self.targets = gpuarray.to_gpu(labels_soft) self.N = self.N_train elif array == 'val': self.data = gpuarray.to_gpu( self.mnist.all_vectors[self.val_idx].astype(np.float32) / 255.) self.N = self.N_val targets = self.mnist.all_labels[self.val_idx] labels_soft = np.zeros((self.N_train, 10), dtype=np.float32) labels_soft[range(self.N_val), targets] = 1. self.targets = gpuarray.to_gpu(labels_soft) elif array == 'test': self.data = gpuarray.to_gpu( self.mnist.all_vectors[self.test_idx].astype(np.float32) / 255.) targets = self.mnist.all_labels[self.test_idx] labels_soft = np.zeros((self.N_test, 10), dtype=np.float32) labels_soft[range(self.N_test), targets] = 1. self.targets = gpuarray.to_gpu(labels_soft) self.N = self.N_test else: raise ValueError('Unknown partition "%s"' % array) self.batch_size = batch_size if batch_size is not None else self.N self.i = 0 def __getitem__(self, batch_idx): if self.batch_size is None: if batch_idx == 0: return self.data, self.targets else: raise ValueError("batch_idx out of bounds") else: minibatch_data = self.data[batch_idx * self.batch_size:(batch_idx + 1) * self.batch_size] minibatch_targets = self.targets[batch_idx * self.batch_size:(batch_idx + 1) * self.batch_size] return minibatch_data, minibatch_targets def next(self): if self.i >= self.N: self.i = 0 raise StopIteration if self.batch_size is None: self.i += self.N return self.data, self.targets else: minibatch_data = self.data[self.i:self.i + self.batch_size] minibatch_targets = self.targets[self.i:self.i + self.batch_size] self.i += self.batch_size return minibatch_data, minibatch_targets