def test_minibatch_iterator(): ''' minibatch_iterator(X, Y, minibatch_size, randomise=False, balanced=False, x_preprocesser=lambda x:x, stitching_function=lambda x: np.array(x), threading=False, num_cached=128): ''' # check X and Y returned correspond X = [-1, -2, -3, -4, -5, -6] Y = [0, 1, 2, 3, 4, 5] mb_x, mb_y = next(mbg.minibatch_iterator(X, Y, len(X))) assert all(mb_x == X) assert all(mb_y == Y) # check the same if we split up into batches all_x = [] all_y = [] for tmp_x, tmp_y in mbg.minibatch_iterator(X, Y, 5): all_x += list(tmp_x) all_y += list(tmp_y) assert all_x == X assert all_y == Y
def test_threading(): import time X = [-1] * 50 Y = [0] * 50 delay = 0.05 def augmenter(x): ''' Augmenter function which actually just delays for half a second. Imagine this is doing some kind of computationally expensive pre-processing or data augmentation. ''' time.sleep(delay) return x # doing without threading: tic = time.time() for tmp_x, tmp_y in mbg.minibatch_iterator( X, Y, 1, x_preprocesser=augmenter, threading=False): time.sleep(delay) no_thread_time = time.time() - tic print "Without threading", no_thread_time # doing with threading: tic = time.time() for tmp_x, tmp_y in mbg.minibatch_iterator( X, Y, 1, x_preprocesser=augmenter, threading=True): time.sleep(delay) thread_time = time.time() - tic print "With threading", time.time() - tic ratio = no_thread_time / thread_time print ratio print np.abs(ratio - 2.0) assert np.abs(ratio - 2.0) < 0.05
def __iter__(self): ##, num_per_class, seed=None #num_samples = num_per_class * 2 channels = self.specs.shape[0] if not self.learn_log: channels += 3 height = self.specs.shape[1] if self.seed is not None: np.random.seed(self.seed) idxs = np.where(self.labels >= 0)[0] for sampled_locs, y in mbg.minibatch_iterator(idxs, self.labels[idxs], self.batch_size, randomise=self.randomise, balanced=self.balanced, class_size='smallest'): # extract the specs bs = y.shape[ 0] # avoid using self.batch_size as last batch may be smaller X = np.zeros((bs, channels, height, self.hww_x * 2), np.float32) y = np.zeros(bs) * np.nan if self.learn_log: X_medians = np.zeros((bs, channels, height), np.float32) count = 0 for loc in sampled_locs: which = self.which_spec[loc] X[count] = self.specs[:, :, (loc - self.hww_x):(loc + self.hww_x)] if not self.learn_log: X[count, 1] = X[count, 0] - self.medians[which][:, None] # X[count, 0] = (X[count, 0] - X[count, 0].mean()) / X[count, 0].std() X[count, 0] = (X[count, 1] - X[count, 1].mean(1, keepdims=True) ) / (X[count, 1].std(1, keepdims=True) + 0.001) X[count, 2] = (X[count, 1] - X[count, 1].mean()) / X[count, 1].std() X[count, 3] = X[count, 1] / X[count, 1].max() y[count] = self.labels[(loc - self.hww_y):(loc + self.hww_y)].max() if self.learn_log: which = self.which_spec[loc] X_medians[count] = self.medians[which] count += 1 # doing augmentation if self.do_aug: if self.learn_log: mult = (1.0 + np.random.randn(bs, 1, 1, 1) * 0.1) mult = np.clip(mult, 0.1, 200) X *= mult else: X *= (1.0 + np.random.randn(bs, 1, 1, 1) * 0.1) X += np.random.randn(bs, 1, 1, 1) * 0.1 if np.random.rand() > 0.9: X += np.roll(X, 1, axis=0) * np.random.randn() if self.learn_log: xb = { 'input': X.astype(np.float32), 'input_med': X_medians.astype(np.float32) } yield xb, y.astype(np.int32) else: yield X.astype(np.float32).transpose(0, 2, 3, 1), y.astype(np.int32)
def test_minibatch_iterator2(): X = [-1, -2, -3, -4, -5, -6, -7] Y = [0, 1, 1, 2, 2, 2, 5] for tmp_x, tmp_y in mbg.minibatch_iterator(X, Y, 5, balanced=True): print tmp_x, tmp_y