def load_images(self, source, ext): im_list = list_dir(source, ext) nb_imgs = len(im_list) self.ImagesR = np.zeros((nb_imgs, 784)) self.ImagesG = np.zeros((nb_imgs, 784)) self.ImagesB = np.zeros((nb_imgs, 784)) print( '[classify_mnist_v2.py -- load_images] load test images from %s' % (source)) print('[classify_mnist_v2.py -- load_images] total images: %d' % (nb_imgs)) for k in range(nb_imgs): img = imread(im_list[k]) self.ImagesR[k, :] = np.reshape(img[:, :, 0], (1, 784)) / 255.0 self.ImagesG[k, :] = np.reshape(img[:, :, 1], (1, 784)) / 255.0 self.ImagesB[k, :] = np.reshape(img[:, :, 2], (1, 784)) / 255.0
def datablock_preprocess(source, nb_splits=32): # load block start = time.time() im_list = list_dir(source, 'png') nb_imgs = len(im_list) step = nb_imgs / nb_splits for nb_block in range(nb_splits): X_block = [] for k in range(step * nb_block, step * (nb_block + 1)): img = imread(im_list[k]) print('[dbutils.py -- datablock_preprocess] loading: {}'.format( im_list[k])) img = img / 255.0 X_block.append( np.reshape( img, (np.shape(img)[0] * np.shape(img)[1] * np.shape(img)[2]))) X_block = np.array(X_block) np.save(source + "batch_" + str(nb_block) + ".npy", X_block) print("[dbutils.py -- datablock_preprocess] Time to load images", time.time() - start)
def random_mini_batches(self, X, Y, mini_batch_size=64, seed=0): if self.name in ['mnist', 'cifar10', 'cifar100']: m = X.shape[1] # number of training examples mini_batches = [] mini_labels = [] # Step 1: Shuffle (X, Y) permutation = list(np.random.permutation(m)) shuffled_X = X[:, permutation] shuffled_Y = Y[:, permutation] # Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case. num_complete_minibatches = int( math.floor(m / self.batch_size) ) # number of mini batches of size mini_batch_size in your partitionning for k in range(0, num_complete_minibatches): mini_batch_X = shuffled_X[:, k * self.batch_size:(k + 1) * self.batch_size] mini_batches.append(mini_batch_X) mini_batch_Y = shuffled_Y[:, k * self.batch_size:(k + 1) * self.batch_size] mini_labels.append(mini_batch_Y) # Handling the end case (last mini-batch < mini_batch_size) #if m % mini_batch_size != 0: # mini_batch_X = shuffled_X[:, num_complete_minibatches * self.batch_size : m] # mini_batches.append(mini_batch_X) return mini_batches, mini_labels elif self.name in ['celeba', 'stl10']: #print('[dataset.py - random_mini_batches] count = %d' % (self.count)) if self.count == 0: self.permutation = list(np.random.permutation(self.nb_imgs)) cur_batch = self.permutation[self.count * self.batch_size:(self.count + 1) * self.batch_size] elif self.count > 0 and self.count < self.nb_compl_batches: cur_batch = self.permutation[self.count * self.batch_size:(self.count + 1) * self.batch_size] elif self.count == self.nb_compl_batches and self.nb_total_batches > self.nb_compl_batches: # cur_batch = self.permutation[self.nb_compl_batches * self.batch_size : self.nb_imgs] #elif self.count >= self.num_total_batches: self.count = 0 self.permutation = list(np.random.permutation(self.nb_imgs)) cur_batch = self.permutation[self.count * self.batch_size:(self.count + 1) * self.batch_size] else: print( '[dataset.py - random_mini_batches] something is wrong with mini-batches' ) mini_batches = [] #print('cur_batch: {}'.format(cur_batch)) # handle complete cases for k in cur_batch: img = imread(self.im_list[k]) if self.name == 'celeba': img = preprocess(img) if self.color_space == 'YUV': img = RGB2YUV(img) img = img / 255.0 mini_batches.append( np.reshape(img, (1, np.shape(img)[0] * np.shape(img)[1] * np.shape(img)[2]))) #print('merging shape', np.shape(mini_batches)) mini_batches = np.concatenate(mini_batches, axis=0) self.count = self.count + 1 return mini_batches elif self.name in ['imagenet_32']: batch_index = np.random.randint(self.nb_splits, size=1)[0] np_file = self.source + "/batch_" + str(batch_index) + ".npy" print( '[dataset.py -- random_mini_batches] processing block: %s for mini-batches generation' % (np_file)) X_1 = np.load(np_file) m = X_1.shape[0] # number of training examples mini_batches = [] permutation = list(np.random.permutation(m)) shuffled_X = X_1[permutation] self.num_complete_minibatches = int( math.floor(m / self.batch_size) ) # number of mini batches of size mini_batch_size in your partitionning for k in range(0, self.num_complete_minibatches): mini_batch_X = shuffled_X[k * self.batch_size:(k + 1) * self.batch_size] mini_batch_X = np.reshape(mini_batch_X, (self.batch_size, 3 * 32 * 32)) # for img in mini_batch_X: # imwrite(np.reshape(img,(32,32,3)), "./image.jpg") # exit() mini_batches.append(mini_batch_X) return mini_batches