def test_minibatch_iterator():
    '''
    minibatch_iterator(X, Y, minibatch_size, randomise=False, balanced=False,
            x_preprocesser=lambda x:x, stitching_function=lambda x: np.array(x),
            threading=False, num_cached=128):
    '''

    # check X and Y returned correspond
    X = [-1, -2, -3, -4, -5, -6]
    Y = [0, 1, 2, 3, 4, 5]

    mb_x, mb_y = next(mbg.minibatch_iterator(X, Y, len(X)))
    assert all(mb_x == X)
    assert all(mb_y == Y)

    # check the same if we split up into batches
    all_x = []
    all_y = []

    for tmp_x, tmp_y in mbg.minibatch_iterator(X, Y, 5):
        all_x += list(tmp_x)
        all_y += list(tmp_y)

    assert all_x == X
    assert all_y == Y
def test_threading():
    import time

    X = [-1] * 50
    Y = [0] * 50
    delay = 0.05

    def augmenter(x):
        '''
        Augmenter function which actually just delays for half a second.
        Imagine this is doing some kind of computationally expensive
        pre-processing or data augmentation.
        '''
        time.sleep(delay)
        return x

    # doing without threading:
    tic = time.time()
    for tmp_x, tmp_y in mbg.minibatch_iterator(
            X, Y, 1, x_preprocesser=augmenter, threading=False):
        time.sleep(delay)

    no_thread_time = time.time() - tic
    print "Without threading", no_thread_time

    # doing with threading:
    tic = time.time()
    for tmp_x, tmp_y in mbg.minibatch_iterator(
            X, Y, 1, x_preprocesser=augmenter, threading=True):
        time.sleep(delay)

    thread_time = time.time() - tic
    print "With threading", time.time() - tic

    ratio = no_thread_time / thread_time
    print ratio
    print np.abs(ratio - 2.0)
    assert np.abs(ratio - 2.0) < 0.05
Exemple #3
0
    def __iter__(self):  ##, num_per_class, seed=None
        #num_samples = num_per_class * 2
        channels = self.specs.shape[0]
        if not self.learn_log:
            channels += 3
        height = self.specs.shape[1]

        if self.seed is not None:
            np.random.seed(self.seed)

        idxs = np.where(self.labels >= 0)[0]
        for sampled_locs, y in mbg.minibatch_iterator(idxs,
                                                      self.labels[idxs],
                                                      self.batch_size,
                                                      randomise=self.randomise,
                                                      balanced=self.balanced,
                                                      class_size='smallest'):

            # extract the specs
            bs = y.shape[
                0]  # avoid using self.batch_size as last batch may be smaller
            X = np.zeros((bs, channels, height, self.hww_x * 2), np.float32)
            y = np.zeros(bs) * np.nan
            if self.learn_log:
                X_medians = np.zeros((bs, channels, height), np.float32)
            count = 0

            for loc in sampled_locs:
                which = self.which_spec[loc]

                X[count] = self.specs[:, :,
                                      (loc - self.hww_x):(loc + self.hww_x)]

                if not self.learn_log:
                    X[count, 1] = X[count, 0] - self.medians[which][:, None]
                    # X[count, 0] = (X[count, 0] - X[count, 0].mean()) / X[count, 0].std()
                    X[count,
                      0] = (X[count, 1] - X[count, 1].mean(1, keepdims=True)
                            ) / (X[count, 1].std(1, keepdims=True) + 0.001)

                    X[count,
                      2] = (X[count, 1] - X[count, 1].mean()) / X[count,
                                                                  1].std()
                    X[count, 3] = X[count, 1] / X[count, 1].max()

                y[count] = self.labels[(loc - self.hww_y):(loc +
                                                           self.hww_y)].max()
                if self.learn_log:
                    which = self.which_spec[loc]
                    X_medians[count] = self.medians[which]

                count += 1

            # doing augmentation
            if self.do_aug:
                if self.learn_log:
                    mult = (1.0 + np.random.randn(bs, 1, 1, 1) * 0.1)
                    mult = np.clip(mult, 0.1, 200)
                    X *= mult
                else:
                    X *= (1.0 + np.random.randn(bs, 1, 1, 1) * 0.1)
                    X += np.random.randn(bs, 1, 1, 1) * 0.1
                    if np.random.rand() > 0.9:
                        X += np.roll(X, 1, axis=0) * np.random.randn()

            if self.learn_log:
                xb = {
                    'input': X.astype(np.float32),
                    'input_med': X_medians.astype(np.float32)
                }
                yield xb, y.astype(np.int32)

            else:
                yield X.astype(np.float32).transpose(0, 2, 3,
                                                     1), y.astype(np.int32)
def test_minibatch_iterator2():
    X = [-1, -2, -3, -4, -5, -6, -7]
    Y = [0, 1, 1, 2, 2, 2, 5]
    for tmp_x, tmp_y in mbg.minibatch_iterator(X, Y, 5, balanced=True):
        print tmp_x, tmp_y