def __init__(self, dataset='mnist', data_path='..\\..\\..\\MNIST\\', file_sample='train-images.idx3-ubyte', file_label='train-labels.idx1-ubyte', is_normalize=True, par_pool=None): # MNIST files for training: 'train-images.idx3-ubyte', 'train-labels.idx1-ubyte' # MNIST files for testing: 't10k-images.idx3-ubyte', 't10k-labels.idx1-ubyte' self.dataset = dataset self.dataInfo = dict() self.images = bf.decode_idx3_ubyte(path.join(data_path, file_sample)) self.labels = bf.decode_idx1_ubyte(path.join(data_path, file_label)) self.length = self.images.shape[0] if is_normalize: self.images /= 256 self.analyse_dataset() self.tmp = None self.nPool = par_pool self.parPool = None if is_debug: self.check_consistency()
def load_data(self, data_path=None, file_sample=None, file_label=None, is_normalize=None): # MNIST files for training: 'train-images.idx3-ubyte', 'train-labels.idx1-ubyte' # MNIST files for testing: 't10k-images.idx3-ubyte', 't10k-labels.idx1-ubyte' if data_path is None: data_path = self.data_path if file_label is not None: self.data_samples = path.join(data_path, file_sample) if self.is_there_labels: self.data_labels = path.join(data_path, file_label) if not (is_normalize is None): self.is_normalize_data = is_normalize self.images = bf.decode_idx3_ubyte(self.data_samples) if self.is_there_labels: self.labels = bf.decode_idx1_ubyte(self.data_labels) self.length = self.images.shape[0] if self.is_normalize_data: self.images /= (254 * (self.images.max() > 2) + 1)