def __init__(self, root, mode, is_gray = False): """Loads the STL dataset. mode should be either 'train', 'test', or 'unlabeled' """ if mode == 'train': self._data, self._label = \ STL10Dataset.get_data(os.path.join(root, 'train.mat')) elif mode == 'test': self._data, self._label = \ STL10Dataset.get_data(os.path.join(root, 'test.mat')) elif mode == 'unlabeled': # h5py allows us to directly read part of the matrix, so each # node will work on his own matdata = h5py.File(os.path.join(root, 'unlabeled.mat'),'r') segments = mpi.get_segments(matdata['X'].shape[1]) # read X = matdata['X'][:, segments[mpi.RANK]:segments[mpi.RANK+1]] X.resize(STL10Dataset._image_dim[::-1] + (X.shape[1],)) self._data = np.ascontiguousarray(np.transpose(X)) self._label = None else: raise ValueError, "Unrecognized mode." if is_gray: self._data = self._data.mean(axis=-1) self._dim = STL10Dataset._image_dim[:2] self._channels = 1 else: self._dim = STL10Dataset._image_dim self._channels = STL10Dataset._num_channels self._prefetch = True
def testGetSegments(self): total = 100 segments, inv = mpi.get_segments(total, True) self.assertEqual(len(segments), mpi.SIZE + 1) self.assertEqual(segments[0], 0) self.assertEqual(segments[-1], total) self.assertEqual(len(inv), total) for i in range(total): self.assertGreaterEqual(i, segments[inv[i]]) self.assertLess(i, segments[inv[i] + 1])
def __init__(self, root, mode, is_gray = False, target_size = None): """Loads the STL dataset. mode should be either 'train', 'test', or 'unlabeled' """ if mode == 'train': self._data, self._label = \ STL10Dataset.get_data(os.path.join(root, 'train.mat')) elif mode == 'test': self._data, self._label = \ STL10Dataset.get_data(os.path.join(root, 'test.mat')) elif mode == 'unlabeled': # h5py allows us to directly read part of the matrix, so each # node will work on his own matdata = h5py.File(os.path.join(root, 'unlabeled.mat'),'r') segments = mpi.get_segments(matdata['X'].shape[1]) # read X = matdata['X'][:, segments[mpi.RANK]:segments[mpi.RANK+1]] X.resize(STL10Dataset._image_dim[::-1] + (X.shape[1],)) self._data = np.ascontiguousarray(np.transpose(X)) self._label = None else: raise ValueError, "Unrecognized mode." if is_gray: self._data = self._data.mean(axis=-1) self._dim = STL10Dataset._image_dim[:2] self._channels = 1 else: self._dim = STL10Dataset._image_dim self._channels = STL10Dataset._num_channels if target_size is not None: # we often want to resize the STL dataset to some other sizes if type(target_size) is not int: raise TypeError, "The input target_size should be an int!" self._dim = (target_size, target_size) old_data = self._data new_size = np.asarray(self._data.shape) new_size[1:3] = target_size self._data = np.empty(new_size) for i in range(self._data.shape[0]): self._data[i] = skimage.transform.resize(old_data[i], (target_size, target_size), mode='nearest') self._prefetch = True
def __init__(self, root, mode, is_gray=False, target_size=None): """Loads the STL dataset. mode should be either 'train', 'test', or 'unlabeled' """ if mode == 'train': self._data, self._label = \ STL10Dataset.get_data(os.path.join(root, 'train.mat')) elif mode == 'test': self._data, self._label = \ STL10Dataset.get_data(os.path.join(root, 'test.mat')) elif mode == 'unlabeled': # h5py allows us to directly read part of the matrix, so each # node will work on his own matdata = h5py.File(os.path.join(root, 'unlabeled.mat'), 'r') segments = mpi.get_segments(matdata['X'].shape[1]) # read X = matdata['X'][:, segments[mpi.RANK]:segments[mpi.RANK + 1]] X.resize(STL10Dataset._image_dim[::-1] + (X.shape[1], )) self._data = np.ascontiguousarray(np.transpose(X)) self._label = None else: raise ValueError, "Unrecognized mode." if is_gray: self._data = self._data.mean(axis=-1) self._dim = STL10Dataset._image_dim[:2] self._channels = 1 else: self._dim = STL10Dataset._image_dim self._channels = STL10Dataset._num_channels if target_size is not None: # we often want to resize the STL dataset to some other sizes if type(target_size) is not int: raise TypeError, "The input target_size should be an int!" self._dim = (target_size, target_size) old_data = self._data new_size = np.asarray(self._data.shape) new_size[1:3] = target_size self._data = np.empty(new_size) for i in range(self._data.shape[0]): self._data[i] = skimage.transform.resize( old_data[i], (target_size, target_size), mode='nearest') self._prefetch = True