def check_padding(axes):

    padding = 3
    ddata = DummyDataset()
    topo = ddata.get_topological_view()

    wf_cls = WindowAndFlip

    wf = wf_cls(window_shape=(5, 5), randomize=[ddata],
                pad_randomized=padding)
    wf.setup(None, None, None)
    new_topo = ddata.get_topological_view()
    assert_equal(topo.shape, new_topo.shape)
    saw_padding = dict([((direction, amount), False) for direction, amount
                        in itertools.product(['l', 'b', 'r', 't'],
                                             xrange(padding))])
    iters = 0
    while not all(saw_padding.values()) and iters < 50:
        for image in new_topo.swapaxes(0, 3):
            for i in xrange(padding):
                if (image[:i] == 0).all():
                    saw_padding['t', i] = True
                if (image[-i:] == 0).all():
                    saw_padding['b', i] = True
                if (image[:, -i:] == 0).all():
                    saw_padding['r', i] = True
                if (image[:, :i] == 0).all():
                    saw_padding['l', i] = True
        wf.on_monitor(None, None, None)
        new_topo = ddata.get_topological_view()
        iters += 1
Example #2
0
    def test_mean_H_given_V(self):
        tol = 1e-6

        # P(h_1 | v) / P(h_2 | v) = a
        # => exp(-E(v, h_1)) / exp(-E(v,h_2)) = a
        # => exp(E(v,h_2)-E(v,h_1)) = a
        # E(v,h_2) - E(v,h_1) = log(a)
        # also log P(h_1 | v) - log P(h_2) = log(a)

        rng = N.random.RandomState([1, 2, 3])

        m = 5

        Vv = as_floatX(N.zeros((m, self.nv)) + rng.randn(self.nv))

        Hv = as_floatX(rng.randn(m, self.nh) > 0.)

        log_Pv = self.log_P_H_given_V_func(Hv, Vv)

        Ev = self.E_func(Vv, Hv)

        for i in xrange(m):
            for j in xrange(i + 1, m):
                log_a = log_Pv[i] - log_Pv[j]
                e = Ev[j] - Ev[i]

                assert abs(e-log_a) < tol
Example #3
0
 def outer(self, Y, Y_hat):
     if self._requires_reshape:
         if self._requires_unmask:
             try:
                 Y, Y_mask = Y
                 Y_hat, Y_hat_mask = Y_hat
             except:
                 log.warning("Lost the mask when wrapping cost. This "
                             "can happen if this function is called "
                             "from within another wrapped function. "
                             "Most likely this won't cause any problem")
                 return cost(self, Y, Y_hat)
         input_shape = ([Y.shape[0] * Y.shape[1]] +
                        [Y.shape[i] for i in xrange(2, Y.ndim)])
         reshaped_Y = Y.reshape(input_shape)
         if isinstance(Y_hat, tuple):
             input_shape = ([[Y_hat[j].shape[0] * Y_hat[j].shape[1]] +
                             [Y_hat[j].shape[i]
                             for i in xrange(2, Y_hat[j].ndim)]
                             for j in xrange(len(Y_hat))])
             reshaped_Y_hat = []
             for i in xrange(len(Y_hat)):
                 reshaped_Y_hat.append(Y_hat[i].reshape(input_shape[i]))
             reshaped_Y_hat = tuple(reshaped_Y_hat)
         else:
             reshaped_Y_hat = Y_hat.reshape(input_shape)
         # Here we need to take the indices of only the unmasked data
         if self._requires_unmask:
             return cost(self, reshaped_Y[Y_mask.flatten().nonzero()],
                         reshaped_Y_hat[Y_mask.flatten().nonzero()])
         return cost(self, reshaped_Y, reshaped_Y_hat)
     else:  # Not RNN-friendly, but not requiring reshape
         return cost(self, Y, Y_hat)
Example #4
0
def stochastic_max_pool_bc01(bc01,
                             pool_shape,
                             pool_stride,
                             image_shape,
                             rng=None):
    """
    .. todo::

        WRITEME properly

    Stochastic max pooling for training as defined in:

    Stochastic Pooling for Regularization of Deep Convolutional Neural Networks
    Matthew D. Zeiler, Rob Fergus

    Parameters
    ----------
    bc01 : theano 4-tensor
        in format (batch size, channels, rows, cols),
        IMPORTANT: All values should be positive
    pool_shape : tuple
        shape of the pool region (rows, cols)
    pool_stride : tuple
        strides between pooling regions (row stride, col stride)
    image_shape : tuple
        avoid doing some of the arithmetic in theano
    rng : theano random stream
    """
    r, c = image_shape
    pr, pc = pool_shape
    rs, cs = pool_stride

    batch = bc01.shape[0]
    channel = bc01.shape[1]

    rng = make_theano_rng(rng, 2022, which_method='multinomial')

    # Compute index in pooled space of last needed pool
    # (needed = each input pixel must appear in at least one pool)
    def last_pool(im_shp, p_shp, p_strd):
        rval = int(numpy.ceil(float(im_shp - p_shp) / p_strd))
        assert p_strd * rval + p_shp >= im_shp
        assert p_strd * (rval - 1) + p_shp < im_shp
        return rval

    # Compute starting row of the last pool
    last_pool_r = last_pool(image_shape[0], pool_shape[0],
                            pool_stride[0]) * pool_stride[0]
    # Compute number of rows needed in image for all indexes to work out
    required_r = last_pool_r + pr

    last_pool_c = last_pool(image_shape[1], pool_shape[1],
                            pool_stride[1]) * pool_stride[1]
    required_c = last_pool_c + pc

    # final result shape
    res_r = int(numpy.floor(last_pool_r / rs)) + 1
    res_c = int(numpy.floor(last_pool_c / cs)) + 1

    for bc01v in get_debug_values(bc01):
        assert not contains_inf(bc01v)
        assert bc01v.shape[2] == image_shape[0]
        assert bc01v.shape[3] == image_shape[1]

    # padding
    padded = tensor.alloc(0.0, batch, channel, required_r, required_c)
    name = bc01.name
    if name is None:
        name = 'anon_bc01'
    bc01 = tensor.set_subtensor(padded[:, :, 0:r, 0:c], bc01)
    bc01.name = 'zero_padded_' + name

    # unraveling
    window = tensor.alloc(0.0, batch, channel, res_r, res_c, pr, pc)
    window.name = 'unravlled_winodows_' + name

    for row_within_pool in xrange(pool_shape[0]):
        row_stop = last_pool_r + row_within_pool + 1
        for col_within_pool in xrange(pool_shape[1]):
            col_stop = last_pool_c + col_within_pool + 1
            win_cell = bc01[:, :, row_within_pool:row_stop:rs,
                            col_within_pool:col_stop:cs]
            window = tensor.set_subtensor(
                window[:, :, :, :, row_within_pool, col_within_pool], win_cell)

    # find the norm
    norm = window.sum(axis=[4, 5])
    norm = tensor.switch(tensor.eq(norm, 0.0), 1.0, norm)
    norm = window / norm.dimshuffle(0, 1, 2, 3, 'x', 'x')
    # get prob
    prob = rng.multinomial(pvals=norm.reshape(
        (batch * channel * res_r * res_c, pr * pc)),
                           dtype='float32')
    # select
    res = (window * prob.reshape(
        (batch, channel, res_r, res_c, pr, pc))).max(axis=5).max(axis=4)
    res.name = 'pooled_' + name

    return tensor.cast(res, theano.config.floatX)
Example #5
0
def tile_raster_images(X, img_shape,
        tile_shape=None, tile_spacing=(1,1),
        scale_rows_to_unit_interval=True,
        output_pixel_vals=True,
        min_dynamic_range=1e-4,
        ):
    """
    Transform an array with one flattened image per row, into an array in which
    images are reshaped and layed out like tiles on a floor.

    This function is useful for visualizing datasets whose rows are images, and
    also columns of matrices for transforming those rows (such as the first
    layer of a neural net).

    Parameters
    ----------
    X : numpy.ndarray or tuple of 4 channels or None
        A 2-D array in which every row is a flattened image.
    img_shape : tuple
        The original shape of each image
    tile_shape: tuple
        The number of images to tile (rows, cols). Defaults to a square-ish \
        shape with the right area for the number of images.
    min_dynamic_range: float, positive
        Dynamic range of each image is used in scaling to the unit interval, \
        but images with less dynamic range than this will be scaled as if \
        this were the dynamic range.

    Returns
    -------
    out_array : 2D array with same dtype as X
        Array suitable for viewing as an image (See:`PIL.Image.fromarray`).
    """
    # This is premature when tile_slices_to_image is not documented at all yet,
    # but ultimately true:
    #print >> sys.stderr, "WARN: tile_raster_images sucks, use tile_slices_to_image"
    if len(img_shape)==3 and img_shape[2]==3:
        # make this save an rgb image
        if scale_rows_to_unit_interval:
            logger.warning("tile_raster_images' scaling routine "
                           "messes up colour - try tile_slices_to_image")
        return tile_raster_images(
                (X[:,0::3], X[:,1::3], X[:,2::3], None),
                img_shape=img_shape[:2],
                tile_shape=tile_shape,
                tile_spacing=tile_spacing,
                scale_rows_to_unit_interval=scale_rows_to_unit_interval,
                output_pixel_vals=output_pixel_vals,
                min_dynamic_range=min_dynamic_range)

    if isinstance(X, tuple):
        n_images_in_x = X[0].shape[0]
    else:
        n_images_in_x = X.shape[0]

    if tile_shape is None:
        tile_shape = most_square_shape(n_images_in_x)

    assert len(img_shape) == 2
    assert len(tile_shape) == 2
    assert len(tile_spacing) == 2

    #out_shape is the shape in pixels of the returned image array
    out_shape = [(ishp + tsp) * tshp - tsp for ishp, tshp, tsp
        in zip(img_shape, tile_shape, tile_spacing)]

    if isinstance(X, tuple):
        if scale_rows_to_unit_interval:
            raise NotImplementedError()
        assert len(X) == 4
        if output_pixel_vals:
            out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype='uint8')
        else:
            out_array = numpy.zeros((out_shape[0], out_shape[1], 4), dtype=X.dtype)

        #colors default to 0, alpha defaults to 1 (opaque)
        if output_pixel_vals:
            channel_defaults = [0,0,0,255]
        else:
            channel_defaults = [0.,0.,0.,1.]

        for i in xrange(4):
            if X[i] is None:
                out_array[:,:,i] = numpy.zeros(out_shape,
                        dtype='uint8' if output_pixel_vals else out_array.dtype
                        )+channel_defaults[i]
            else:
                out_array[:,:,i] = tile_raster_images(X[i], img_shape, tile_shape, tile_spacing, scale_rows_to_unit_interval, output_pixel_vals)
        return out_array

    else:
        H, W = img_shape
        Hs, Ws = tile_spacing

        out_scaling = 1
        if output_pixel_vals and str(X.dtype).startswith('float'):
            out_scaling = 255

        out_array = numpy.zeros(out_shape, dtype='uint8' if output_pixel_vals else X.dtype)
        for tile_row in xrange(tile_shape[0]):
            for tile_col in xrange(tile_shape[1]):
                if tile_row * tile_shape[1] + tile_col < X.shape[0]:
                    if scale_rows_to_unit_interval:
                        try:
                            this_img = scale_to_unit_interval(
                                    X[tile_row * tile_shape[1] + tile_col].reshape(img_shape),
                                    eps=min_dynamic_range)
                        except ValueError:
                            raise ValueError('Failed to reshape array of shape %s to shape %s'
                                    % (
                                        X[tile_row*tile_shape[1] + tile_col].shape
                                        , img_shape
                                        ))
                    else:
                        this_img = X[tile_row * tile_shape[1] + tile_col].reshape(img_shape)
                    out_array[
                        tile_row * (H+Hs):tile_row*(H+Hs)+H,
                        tile_col * (W+Ws):tile_col*(W+Ws)+W
                        ] \
                        = this_img * out_scaling
        return out_array
Example #6
0
    def __init__(self, which_set, center=False, example_range=None):
        """
        .. todo::

            WRITEME
        """
        if which_set == 'train':
            train = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/train.mat')

            # Load the class names
            self.class_names = [
                array[0].encode('utf-8') for array in train['class_names'][0]
            ]

            # Load the fold indices
            fold_indices = train['fold_indices']
            assert fold_indices.shape == (1, 10)
            self.fold_indices = np.zeros((10, 1000), dtype='uint16')
            for i in xrange(10):
                indices = fold_indices[0, i]
                assert indices.shape == (1000, 1)
                assert indices.dtype == 'uint16'
                self.fold_indices[i, :] = indices[:, 0]

            # The data is stored as uint8
            # If we leave it as uint8, it will cause the CAE to silently fail
            # since theano will treat derivatives wrt X as 0
            X = np.cast['float32'](train['X'])

            assert X.shape == (5000, 96 * 96 * 3)

            if example_range is not None:
                X = X[example_range[0]:example_range[1], :]

            # this is uint8
            y = train['y'][:, 0]
            assert y.shape == (5000, )
        elif which_set == 'test':
            test = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/test.mat')

            # Load the class names
            self.class_names = [
                array[0].encode('utf-8') for array in test['class_names'][0]
            ]

            # The data is stored as uint8
            # If we leave it as uint8, it will cause the CAE to silently fail
            # since theano will treat derivatives wrt X as 0

            X = np.cast['float32'](test['X'])
            assert X.shape == (8000, 96 * 96 * 3)

            if example_range is not None:
                X = X[example_range[0]:example_range[1], :]

            # this is uint8
            y = test['y'][:, 0]
            assert y.shape == (8000, )

        elif which_set == 'unlabeled':
            unlabeled = load('${PYLEARN2_DATA_PATH}/stl10/stl10_matlab/'
                             'unlabeled.mat')

            X = unlabeled['X']

            # this file is stored in HDF format, which transposes everything
            assert X.shape == (96 * 96 * 3, 100000)
            assert X.dtype == 'uint8'

            if example_range is None:
                X = X.value
            else:
                X = X.value[:, example_range[0]:example_range[1]]
            X = np.cast['float32'](X.T)

            unlabeled.close()

            y = None

        else:
            raise ValueError('"' + which_set + '" is not an STL10 dataset. '
                             'Recognized values are "train", "test", and '
                             '"unlabeled".')
        if center:
            X -= 127.5

        view_converter = dense_design_matrix.DefaultViewConverter((96, 96, 3))

        super(STL10, self).__init__(X=X,
                                    y=y,
                                    y_labels=10,
                                    view_converter=view_converter)

        for i in xrange(self.X.shape[0]):
            mat = X[i:i + 1, :]
            topo = self.get_topological_view(mat)
            for j in xrange(topo.shape[3]):
                temp = topo[0, :, :, j].T.copy()
                topo[0, :, :, j] = temp
            mat = self.get_design_matrix(topo)
            X[i:i + 1, :] = mat

        assert not contains_nan(self.X)
Example #7
0
    def __init__(self,
                 datasets,
                 which_set,
                 sequence=1,
                 dropout=True,
                 normalise=[],
                 labels=[],
                 shuffle=False,
                 start=None,
                 stop=None):

        self.args = locals()

        assert which_set in ['train', 'valid']
        assert len(normalise) == len(datasets)

        vector_spaces = tuple()
        data = tuple()
        self.mean = list()
        self.std = list()
        z = list()
        for dataset, n in zip(datasets, normalise):
            if dataset[-3] == 'npz':
                mode_data = N.load(dataset)['arr_0']
            else:
                mode_data = N.load(dataset)

            z.append((mode_data == 0).all(axis=1))

            r, c = mode_data.shape

            if start is not None:
                assert stop is not None
                assert start >= 0
                assert stop > start
                if stop > mode_data.shape[0]:
                    raise ValueError('stop=' + str(stop) + '>' + 'm=' +
                                     str(mode_data.shape[0]))
                mode_data = mode_data[start:stop, :]
                if mode_data.shape[0] != stop - start:
                    raise ValueError("data.shape[0]: %d. start: %d stop: %d" %
                                     (mode_data.shape[0], start, stop))

            cuts = (mode_data == 0).all(axis=1).nonzero()
            mean = 1
            std = 1

            if n == 1:
                mean = mode_data.mean()
                std = mode_data.std()
            elif n == 2:
                mean = mode_data.mean(axis=0)
                std = mode_data.std(axis=0)

            mode_data = (mode_data - mean) / std
            self.mean.append(mean)
            self.std.append(std)

            if sequence != 1:
                temp = mode_data
                mode_data = np.zeros([temp.shape[0], sequence * temp.shape[1]])
                for i in range(0, temp.shape[0] - sequence):
                    mode_data[i, :] = temp[i:i + sequence, :].reshape(
                        sequence * temp.shape[1])
                del temp

            vector_spaces = vector_spaces + (VectorSpace(mode_data.shape[1]), )
            data = data + (mode_data, )

        # # remove changes between datasets if stacked
        # b = N.logical_and(z[0],z[1])
        # for a in z[2:]:
        #     b = N.logical_and(b,a)
        # for ii in range(b.shape[0]):
        #     if b[ii]:
        #         for i in range(ii-sequence+1,ii):
        #             b[i] = True
        # ind = N.logical_not(b).nonzero()
        # for ii in range(len(data)):
        #     data[ii] = data[ii][ind,:]
        #
        # assert data[0].shape[0] == data[1].shape[0]
        # assert data[2].shape[0] == data[1].shape[0]
        # assert data[2].shape[0] == data[0].shape[0]

        ground_truth = data[0].copy()
        for ii in data[1:]:
            ground_truth = N.concatenate((ground_truth, ii), axis=1)

        # Modal Dropout - drop only 1 mode at a time
        if dropout:

            data = list(data)
            seq = N.ones((1, 3))
            for ii in range(len(datasets)):
                seq2 = seq.copy()
                seq2[:, ii] = N.zeros((seq.shape[0]))
                seq = N.concatenate((seq, seq2), axis=0)
            seq = seq[1:-1, :]

            cases = N.where(seq.sum(axis=1) != len(datasets) - 1)
            seq = N.delete(seq, cases, axis=0)

            all_data = list(data)
            all_data.append(ground_truth)

            for ii in all_data:
                assert not N.isnan(N.sum(ii))
            for i, ii in enumerate(seq):
                for j, jj in enumerate(ii):
                    if jj:
                        all_data[j] = N.concatenate((all_data[j], data[j]),
                                                    axis=0)
                    else:
                        all_data[j] = N.concatenate(
                            (all_data[j], N.zeros(data[j].shape)), axis=0)
                all_data[-1] = N.concatenate((all_data[-1], ground_truth),
                                             axis=0)

            data = tuple(all_data)
            del all_data
        else:
            data = list(data)
            data.append(ground_truth)
            data = tuple(data)
        vector_spaces = vector_spaces + (VectorSpace(data[-1].shape[1]), )

        if shuffle:
            self.shuffle_rng = make_np_rng(None, [1, 2, 3],
                                           which_method="shuffle")
            for ii in xrange(data[0].shape[0]):
                jj = self.shuffle_rng.randint(data[0].shape[0])
                # Copy ensures that memory is not aliased.
                for d in data:
                    tmp = d[ii, :].copy()
                    d[ii, :] = d[jj, :]
                    d[jj, :] = tmp

        if len(labels) == 0:
            for ii in range(len(datasets)):
                labels.append('dataset_%i' % ii)
        labels.append('targets')
        data_specs = (CompositeSpace(vector_spaces), tuple(labels))
        super(CustomMMLoaderDropout, self).__init__(data=data,
                                                    data_specs=data_specs)
Example #8
0
    def __init__(self,
                 models,
                 datasets,
                 normalise,
                 which_set,
                 batch_size,
                 shuffle=False,
                 start=None,
                 stop=None,
                 length=None,
                 axes=['b', 0, 1, 'c']):

        self.args = locals()
        existing_data_path = os.environ['MMDAEdata'] + splitext(
            basename(models[0]))[0] + '_' + splitext(basename(
                datasets[0]))[0] + '_' + splitext(
                    basename(models[1]))[0] + '_' + splitext(
                        basename(datasets[1]))[0] + '_' + which_set + '.npy'
        assert which_set in ['train', 'valid']
        assert type(models) is list
        assert type(datasets) is list
        assert len(models) == len(datasets)
        assert len(models) == len(normalise)

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        # only process if it hasn't been done already
        if not os.path.exists(existing_data_path):
            for ii in range(len(models)):
                # Load single mode model for first layer
                model = serial.load(models[ii])
                # Load the single mode data
                data = N.load(datasets[ii])

                if normalise[ii] == 1:
                    data_mean = data.mean()
                    data_std = data.std()
                elif normalise[ii] == 2:
                    data_mean = data.mean(axis=0)
                    data_std = data.std(axis=0)

                data = (data - data_mean) / data_std

                sequence = model.dataset_yaml_src.split('sequence: ')
                if len(sequence) > 1:
                    sequence = int(sequence[1].split(',')[0])
                else:
                    sequence = 1
                if 'sequence_old' in locals():
                    assert sequence == sequence_old
                sequence_old = sequence
                if sequence != 1:
                    temp = data
                    data = np.zeros([temp.shape[0], sequence * temp.shape[1]])
                    for i in range(0, temp.shape[0] - sequence):
                        data[i, :] = temp[i:i + sequence, :].reshape(
                            1, sequence * temp.shape[1])
                    del temp
                if start is not None:
                    assert stop is not None
                    assert start >= 0
                    assert stop > start
                    assert ((stop - start) % batch_size) == 0
                    if stop > data.shape[0]:
                        raise ValueError('stop=' + str(stop) + '>' + 'm=' +
                                         str(self.X.shape[0]))
                    data = data[start:stop, :]
                    if data.shape[0] != stop - start:
                        raise ValueError("X.shape[0]: %d. start: %d stop: %d" %
                                         (self.X.shape[0], start, stop))

                # Process data to get hidden representation from first layer
                data = theano.shared(data)
                data = model.mf(data)
                data = data[0]
                data = data[0]
                data = data.eval()

                if 'topo_view' not in locals():
                    topo_view = data.reshape(data.shape[0], 1, data.shape[1])
                else:
                    topo_view = N.append(topo_view,
                                         data.reshape(data.shape[0], 1,
                                                      data.shape[1]),
                                         axis=2)

            m, r, c = topo_view.shape

            topo_view = topo_view.reshape(m, r, c, 1)

            # save the data to avoid reprocessing later
            N.save(existing_data_path, topo_view)

        else:
            topo_view = N.load(existing_data_path)
            m = topo_view.shape[0]

        if shuffle:
            self.shuffle_rng = make_np_rng(None, [1, 2, 3],
                                           which_method="shuffle")
            for i in xrange(topo_view.shape[0]):
                j = self.shuffle_rng.randint(m)
                # Copy ensures that memory is not aliased.
                tmp = topo_view[i, :, :, :].copy()
                topo_view[i, :, :, :] = topo_view[j, :, :, :]
                topo_view[j, :, :, :] = tmp

        super(CustomMMPosterior,
              self).__init__(topo_view=dimshuffle(topo_view))

        assert not N.any(N.isnan(self.X))
def check_sample_correctishness_c01b(f):
    batch_size = 5
    rows = 32
    cols = 30
    channels = 3
    pool_rows = 2
    pool_cols = 3
    rng = np.random.RandomState([2012, 9, 26])
    zv = rng.randn(channels, rows, cols,
                   batch_size).astype(config.floatX) * 2. - 3.
    top_down_v = rng.randn(channels, rows / pool_rows, cols / pool_cols,
                           batch_size).astype(config.floatX)

    z_th = T.TensorType(broadcastable=(False, False, False, False),
                        dtype = config.floatX)()
    z_th.name = 'z_th'
    z_th.tag.test_value = zv

    top_down_th = T.TensorType(broadcastable=(False, False, False, False),
                               dtype = config.floatX)()
    top_down_th.name = 'top_down_th'
    top_down_th.tag.test_value = top_down_v

    theano_rng = MRG_RandomStreams(rng.randint(2147462579))
    p_th, h_th, p_sth, h_sth = f(z_th, (pool_rows, pool_cols), top_down_th,
                                 theano_rng)

    prob_func = function([z_th, top_down_th], [p_th, h_th])
    pv, hv = prob_func(zv, top_down_v)

    sample_func = function([z_th, top_down_th], [p_sth, h_sth])

    acc_p = 0. * pv
    acc_h = 0. * hv

    # make sure the test gets good coverage, ie, that it includes
    # many different activation probs for both detector and pooling layer
    buckets = 10
    bucket_width = 1. / float(buckets)
    for i in xrange(buckets):
        lower_lim = i * bucket_width
        upper_lim = (i+1) * bucket_width

        assert np.any((pv >= lower_lim) * (pv < upper_lim))
        assert np.any((hv >= lower_lim) * (hv < upper_lim))

    assert upper_lim == 1.

    for i in xrange(10000):
        ps, hs = sample_func(zv, top_down_v)

        assert ps.shape == pv.shape
        assert hs.shape == hv.shape

        acc_p += ps
        acc_h += hs

    est_p = acc_p / float(i+1)
    est_h = acc_h / float(i+1)

    pd = np.abs(est_p-pv)
    hd = np.abs(est_h-hv)

    # don't really know how tight this should be
    # but you can try to pose an equivalent problem
    # and implement it in another way
    # using a numpy implementation in softmax_acc.py
    # I got a max error of .17
    assert max(pd.max(), hd.max()) < .17

    # Do exhaustive checks on just the last sample
    assert np.all((ps == 0) + (ps == 1))
    assert np.all((hs == 0) + (hs == 1))

    for k in xrange(batch_size):
        for i in xrange(ps.shape[1]):
            for j in xrange(ps.shape[2]):
                for l in xrange(channels):
                    p = ps[l, i, j, k]
                    h = hs[l, i*pool_rows:(i+1)*pool_rows,
                           j*pool_cols:(j+1)*pool_cols, k]
                    assert h.shape == (pool_rows, pool_cols)
                    assert p == h.max()

    """ If you made it to here, it's correctish
Example #10
0
    def __init__(self, which_set, center=False, custom_path=None):
        assert which_set in ['train', 'test', 'unlabeled', 'custom']

        path = "${PYLEARN2_DATA_PATH}/TLChallenge"

        if which_set == 'train':
            path += '/training/training-data.dat'
        elif which_set == 'test':
            path += '/test/test-data.dat'
        elif which_set == 'unlabeled':
            path += '/unlabelled_tiny.dat'
        elif which_set == 'custom':
            path = custom_path

        remote_path = preprocess(path)

        path = cache.datasetCache.cache_file(remote_path)
        X = N.fromfile(path, dtype=N.uint8, sep=' ')

        X = X.reshape(X.shape[0] / (32 * 32 * 3), 32 * 32 * 3, order='F')

        assert X.max() == 255
        assert X.min() == 0

        X = N.cast['float32'](X)
        y = None

        if center:
            X -= 127.5

        view_converter = dense_design_matrix.DefaultViewConverter((32, 32, 3))

        X = view_converter.design_mat_to_topo_view(X)

        X = N.transpose(X, (0, 2, 1, 3))

        X = view_converter.topo_view_to_design_mat(X)

        super(TL_Challenge, self).__init__(X=X,
                                           y=y,
                                           y_labels=N.max(y) + 1,
                                           view_converter=view_converter)

        assert not N.any(N.isnan(self.X))

        if which_set == 'train' or which_set == 'test':
            labels_path = remote_path[:-8] + 'labels.dat'
            labels_path = cache.datasetCache.cache_file(labels_path)
            self.y_fine = N.fromfile(labels_path, dtype=N.uint8, sep=' ')
            assert len(self.y_fine.shape) == 1
            assert self.y_fine.shape[0] == X.shape[0]
            # 0 :  aquatic_mammals
            # 1 :  fish
            # 2 :  flowers
            FOOD_CONTAINER = 3
            FRUIT = 4
            # 5 :  household_electrical_devices
            FURNITURE = 6
            INSECTS = 7
            # 8 :  large_carnivores
            # 9 :  large_man-made_outdoor_things
            # 10 :  large_natural_outdoor_scenes
            LARGE_OMNIVORES_HERBIVORES = 11
            MEDIUM_MAMMAL = 12
            # 13 :  non-insect_invertebrates
            # 14 :  people
            # 15 :  reptiles
            # 16 :  small_mammals
            # 17 :  trees
            # 18 :  vehicles_1
            # 19 :  vehicles_2

            self.y_coarse = self.y_fine.copy()
            self.y_coarse[self.y_coarse == 100] = INSECTS
            self.y_coarse[self.y_coarse == 101] = LARGE_OMNIVORES_HERBIVORES
            self.y_coarse[self.y_coarse == 102] = LARGE_OMNIVORES_HERBIVORES
            self.y_coarse[self.y_coarse == 103] = LARGE_OMNIVORES_HERBIVORES
            self.y_coarse[self.y_coarse == 104] = FRUIT
            self.y_coarse[self.y_coarse == 105] = FOOD_CONTAINER
            self.y_coarse[self.y_coarse == 106] = FRUIT
            self.y_coarse[self.y_coarse == 107] = MEDIUM_MAMMAL
            self.y_coarse[self.y_coarse == 108] = FRUIT
            self.y_coarse[self.y_coarse == 109] = FURNITURE

            assert self.y_coarse.min() == 3
            assert self.y_coarse.max() == 12

            for i in xrange(120):
                if self.y_coarse[i] == FRUIT:

                    assert self.y_fine[i] in [104, 106, 108]
Example #11
0
def test_softmax_mf_energy_consistent():

    # A test of the Softmax class
    # Verifies that the mean field update is consistent with
    # the energy function

    # Since a Softmax layer contains only one random variable
    # (with n_classes possible values) the mean field assumption
    # does not impose any restriction so mf_update simply gives
    # the true expected value of h given v.
    # We also know P(h |  v)
    #  = P(h, v) / P( v)
    #  = P(h, v) / sum_h P(h, v)
    #  = exp(-E(h, v)) / sum_h exp(-E(h, v))
    # So we can check that computing P(h | v) with both
    # methods works the same way

    rng = np.random.RandomState([2012, 11, 1, 1131])

    # Make DBM
    num_vis = rng.randint(1, 11)
    n_classes = rng.randint(1, 11)

    v = BinaryVector(num_vis)
    v.set_biases(rng.uniform(-1., 1., (num_vis, )).astype(config.floatX))

    y = Softmax(n_classes=n_classes, layer_name='y', irange=1.)
    y.set_biases(rng.uniform(-1., 1., (n_classes, )).astype(config.floatX))

    dbm = DBM(visible_layer=v, hidden_layers=[y], batch_size=1, niter=50)

    # Randomly pick a v to condition on
    # (Random numbers are generated via dbm.rng)
    layer_to_state = dbm.make_layer_to_state(1)
    v_state = layer_to_state[v]
    y_state = layer_to_state[y]

    # Infer P(y | v) using mean field
    expected_y = y.mf_update(state_below=v.upward_state(v_state))

    expected_y = expected_y[0, :]

    expected_y = expected_y.eval()

    # Infer P(y | v) using the energy function
    energy = dbm.energy(V=v_state, hidden=[y_state])
    unnormalized_prob = T.exp(-energy)
    assert unnormalized_prob.ndim == 1
    unnormalized_prob = unnormalized_prob[0]
    unnormalized_prob = function([], unnormalized_prob)

    def compute_unnormalized_prob(which):
        write_y = np.zeros((n_classes, ))
        write_y[which] = 1.

        y_value = y_state.get_value()

        y_value[0, :] = write_y

        y_state.set_value(y_value)

        return unnormalized_prob()

    probs = [compute_unnormalized_prob(idx) for idx in xrange(n_classes)]
    denom = sum(probs)
    probs = [on_prob / denom for on_prob in probs]

    # np.asarray(probs) doesn't make a numpy vector, so I do it manually
    wtf_numpy = np.zeros((n_classes, ))
    for i in xrange(n_classes):
        wtf_numpy[i] = probs[i]
    probs = wtf_numpy

    if not np.allclose(expected_y, probs):
        print('mean field expectation of h:', expected_y)
        print('expectation of h based on enumerating energy function values:',
              probs)
        assert False
def check_sample_correctishness_b01c(f):
    batch_size = 5
    rows = 32
    cols = 30
    channels = 3
    pool_rows = 2
    pool_cols = 3
    rng = np.random.RandomState([2012, 9, 26])
    zv = rng.randn(batch_size, rows, cols,
                   channels).astype(config.floatX) * 2. - 3.
    top_down_v = rng.randn(batch_size, rows / pool_rows, cols / pool_cols,
                           channels).astype(config.floatX)

    z_th = T.TensorType(broadcastable=(False, False, False, False),
                        dtype = config.floatX)()
    z_th.name = 'z_th'

    top_down_th = T.TensorType(broadcastable=(False, False, False, False),
                               dtype = config.floatX)()
    top_down_th.name = 'top_down_th'

    theano_rng = MRG_RandomStreams(rng.randint(2147462579))
    p_th, h_th, p_sth, h_sth = f(z_th, (pool_rows, pool_cols), top_down_th,
                                 theano_rng)

    prob_func = function([z_th, top_down_th], [p_th, h_th])
    pv, hv = prob_func(zv, top_down_v)

    sample_func = function([z_th, top_down_th], [p_sth, h_sth])

    acc_p = 0. * pv
    acc_h = 0. * hv

    # make sure the test gets good coverage, ie, that it includes many
    # different activation probs for both detector and pooling layer
    buckets = 10
    bucket_width = 1. / float(buckets)
    for i in xrange(buckets):
        lower_lim = i * bucket_width
        upper_lim = (i+1) * bucket_width

        assert np.any((pv >= lower_lim) * (pv < upper_lim))
        assert np.any((hv >= lower_lim) * (hv < upper_lim))

    assert upper_lim == 1.

    for i in xrange(10000):
        ps, hs = sample_func(zv, top_down_v)

        assert ps.shape == pv.shape
        assert hs.shape == hv.shape

        acc_p += ps
        acc_h += hs

    est_p = acc_p / float(i+1)
    est_h = acc_h / float(i+1)

    pd = np.abs(est_p-pv)
    hd = np.abs(est_h-hv)

    """
    # plot maps of the estimation error, this is to see if it has
    # some spatial pattern this is useful for detecting bugs like
    # not handling the border correctly, etc.
    from pylearn2.gui.patch_viewer import PatchViewer

    pv = PatchViewer((pd.shape[0],pd.shape[3]),(pd.shape[1],pd.shape[2]),
                      is_color = False)
    for i in xrange(pd.shape[0]):
    for j in xrange(pd.shape[3]):
    pv.add_patch((pd[i,:,:,j] / pd.max() )* 2.0 - 1.0, rescale = False)
    pv.show()

    pv = PatchViewer((hd.shape[0],hd.shape[3]),(hd.shape[1],hd.shape[2]),
                      is_color = False)
    for i in xrange(hd.shape[0]):
    for j in xrange(hd.shape[3]):
    pv.add_patch( (hd[i,:,:,j] / hd.max() )* 2.0 - 1.0, rescale = False)
    pv.show()
    """

    """
    plot expectation to estimate versus error in estimation
    expect bigger errors for values closer to 0.5

    from matplotlib import pyplot as plt

    #nelem = reduce( lambda x, y : x*y, pd.shape)
    #plt.scatter( pv.reshape(nelem), pd.reshape(nelem))
    #plt.show()

    nelem = reduce( lambda x, y : x*y, hd.shape)
    plt.scatter( hv.reshape(nelem), hd.reshape(nelem))
    plt.show()
    """

    # don't really know how tight this should be
    # but you can try to pose an equivalent problem
    # and implement it in another way
    # using a numpy implementation in softmax_acc.py
    # I got a max error of .17
    assert max(pd.max(), hd.max()) < .17

    # Do exhaustive checks on just the last sample
    assert np.all((ps == 0) + (ps == 1))
    assert np.all((hs == 0) + (hs == 1))

    for k in xrange(batch_size):
        for i in xrange(ps.shape[1]):
            for j in xrange(ps.shape[2]):
                for l in xrange(channels):
                    p = ps[k, i, j, l]
                    h = hs[k, i*pool_rows:(i+1)*pool_rows,
                           j*pool_cols:(j+1)*pool_cols, l]
                    assert h.shape == (pool_rows, pool_cols)
                    assert p == h.max()

    """ If you made it to here, it's correctish
Example #13
0
    def do_test(pool_size_1):

        # Make DBM and read out its pieces
        dbm = make_random_basic_binary_dbm(
            rng=rng,
            pool_size_1=pool_size_1,
            pool_size_2=1,  # centering is only updated for pool size 1
            center=True)

        v = dbm.visible_layer
        h1, h2 = dbm.hidden_layers

        num_p = h1.get_output_space().dim

        # Choose which unit we will test
        p_idx = rng.randint(num_p)

        # Randomly pick a v, h1[-p_idx], and h2 to condition on
        # (Random numbers are generated via dbm.rng)
        layer_to_state = dbm.make_layer_to_state(1)
        v_state = layer_to_state[v]
        h1_state = layer_to_state[h1]
        h2_state = layer_to_state[h2]

        # Debugging checks
        num_h = h1.detector_layer_dim
        assert num_p * pool_size_1 == num_h
        pv, hv = h1_state
        assert pv.get_value().shape == (1, num_p)
        assert hv.get_value().shape == (1, num_h)

        # Infer P(h1[i] | h2, v) using mean field
        expected_p, expected_h = h1.mf_update(
            state_below=v.upward_state(v_state),
            state_above=h2.downward_state(h2_state),
            layer_above=h2)

        expected_p = expected_p[0, p_idx]
        expected_h = expected_h[0,
                                p_idx * pool_size_1:(p_idx + 1) * pool_size_1]

        expected_p, expected_h = function([], [expected_p, expected_h])()

        # Infer P(h1[i] | h2, v) using the energy function
        energy = dbm.energy(V=v_state, hidden=[h1_state, h2_state])
        unnormalized_prob = T.exp(-energy)
        assert unnormalized_prob.ndim == 1
        unnormalized_prob = unnormalized_prob[0]
        unnormalized_prob = function([], unnormalized_prob)

        p_state, h_state = h1_state

        def compute_unnormalized_prob(which_detector):
            write_h = np.zeros((pool_size_1, ))
            if which_detector is None:
                write_p = 0.
            else:
                write_p = 1.
                write_h[which_detector] = 1.

            h_value = h_state.get_value()
            p_value = p_state.get_value()

            h_value[0, p_idx * pool_size_1:(p_idx + 1) * pool_size_1] = write_h
            p_value[0, p_idx] = write_p

            h_state.set_value(h_value)
            p_state.set_value(p_value)

            return unnormalized_prob()

        off_prob = compute_unnormalized_prob(None)
        on_probs = [
            compute_unnormalized_prob(idx) for idx in xrange(pool_size_1)
        ]
        denom = off_prob + sum(on_probs)
        off_prob /= denom
        on_probs = [on_prob / denom for on_prob in on_probs]
        assert np.allclose(1., off_prob + sum(on_probs))

        # np.asarray(on_probs) doesn't make a numpy vector, so I do it manually
        wtf_numpy = np.zeros((pool_size_1, ))
        for i in xrange(pool_size_1):
            wtf_numpy[i] = on_probs[i]
        on_probs = wtf_numpy

        # Check that they match
        if not np.allclose(expected_p, 1. - off_prob):
            print('mean field expectation of p:', expected_p)
            print(
                'expectation of p based on enumerating energy function values:',
                1. - off_prob)
            print('pool_size_1:', pool_size_1)

            assert False
        if not np.allclose(expected_h, on_probs):
            print('mean field expectation of h:', expected_h)
            print(
                'expectation of h based on enumerating energy function values:',
                on_probs)
            assert False
Example #14
0
def check_bvmp_samples(value, num_samples, n, pool_size, mean, tol):
    """
    bvmp=BinaryVectorMaxPool
    value: a tuple giving (pooled batch, detector batch)   (all made with same params)
    num_samples: number of samples there should be in the batch
    n: detector layer dimension
    pool_size: size of each pool region
    mean: (expected value of pool unit, expected value of detector units)
    tol: amount the emprical mean is allowed to deviate from the analytical expectation

    checks that:
        1) all values are binary
        2) detector layer units are mutually exclusive
        3) pooled unit is max of the detector units
        4) correct number of samples is present
        5) variables are of the right shapes
        6) samples converge to the right expected value
    """

    pv, hv = value

    assert n % pool_size == 0
    num_pools = n // pool_size

    assert pv.ndim == 2
    assert pv.shape[0] == num_samples
    assert pv.shape[1] == num_pools

    assert hv.ndim == 2
    assert hv.shape[0] == num_samples
    assert hv.shape[1] == n

    assert is_binary(pv)
    assert is_binary(hv)

    for i in xrange(num_pools):
        sub_p = pv[:, i]
        assert sub_p.shape == (num_samples, )
        sub_h = hv[:, i * pool_size:(i + 1) * pool_size]
        assert sub_h.shape == (num_samples, pool_size)
        if not np.all(sub_p == sub_h.max(axis=1)):
            for j in xrange(num_samples):
                print(sub_p[j], sub_h[j, :])
                assert sub_p[j] == sub_h[j, :]
            assert False
        assert np.max(sub_h.sum(axis=1)) == 1

    p, h = mean
    assert p.ndim == 1
    assert h.ndim == 1
    emp_p = pv.mean(axis=0)
    emp_h = hv.mean(axis=0)

    max_diff = np.abs(p - emp_p).max()
    if max_diff > tol:
        print('expected value of pooling units: ', p)
        print('empirical expectation: ', emp_p)
        print('maximum difference: ', max_diff)
        raise ValueError("Pooling unit samples have an unlikely mean.")
    max_diff = np.abs(h - emp_h).max()
    if max_diff > tol:
        assert False
Example #15
0
        def split_train_valid(path, num_valid_train=400, num_valid_extra=200):
            """
            Extract number of class balanced samples from train and extra
            sets for validation, and regard the remaining as new train set.

            Parameters
            ----------
            num_valid_train : int, optional
                Number of samples per class from train
            num_valid_extra : int, optional
                Number of samples per class from extra
            """

            # load difficult train
            data = load("{0}train_32x32.mat".format(path))
            valid_index = []
            for i in xrange(1, 11):
                index = numpy.nonzero(data['y'] == i)[0]
                index.flags.writeable = 1
                rng.shuffle(index)
                valid_index.append(index[:num_valid_train])

            valid_index = set(numpy.concatenate(valid_index))
            train_index = set(numpy.arange(data['X'].shape[3])) - valid_index
            valid_index = list(valid_index)
            train_index = list(train_index)

            train_x = data['X'][:, :, :, train_index]
            train_y = data['y'][train_index, :]
            valid_x = data['X'][:, :, :, valid_index]
            valid_y = data['y'][valid_index, :]

            train_size = data['X'].shape[3]
            assert train_x.shape[3] == train_size - num_valid_train * 10
            assert train_y.shape[0] == train_size - num_valid_train * 10
            assert valid_x.shape[3] == num_valid_train * 10
            assert valid_y.shape[0] == num_valid_train * 10
            del data
            gc.collect()

            # load extra train
            data = load("{0}extra_32x32.mat".format(path))
            valid_index = []
            for i in xrange(1, 11):
                index = numpy.nonzero(data['y'] == i)[0]
                index.flags.writeable = 1
                rng.shuffle(index)
                valid_index.append(index[:num_valid_extra])

            valid_index = set(numpy.concatenate(valid_index))
            train_index = set(numpy.arange(data['X'].shape[3])) - valid_index
            valid_index = list(valid_index)
            train_index = list(train_index)

            train_x = numpy.concatenate(
                (train_x, data['X'][:, :, :, train_index]), axis=3)
            train_y = numpy.concatenate((train_y, data['y'][train_index, :]))
            valid_x = numpy.concatenate(
                (valid_x, data['X'][:, :, :, valid_index]), axis=3)
            valid_y = numpy.concatenate((valid_y, data['y'][valid_index, :]))

            extra_size = data['X'].shape[3]
            sizes['valid'] = (num_valid_train + num_valid_extra) * 10
            sizes['splitted_train'] = train_size + extra_size - sizes['valid']
            assert train_x.shape[3] == sizes['splitted_train']
            assert train_y.shape[0] == sizes['splitted_train']
            assert valid_x.shape[3] == sizes['valid']
            assert valid_y.shape[0] == sizes['valid']
            del data
            gc.collect()

            train_x = numpy.cast[config.floatX](train_x)
            valid_x = numpy.cast[config.floatX](valid_x)
            return design_matrix_view(train_x), train_y,\
                design_matrix_view(valid_x), valid_y
Example #16
0
def estimate_likelihood(W_list,
                        b_list,
                        trainset,
                        testset,
                        free_energy_fn=None,
                        batch_size=100,
                        large_ais=False,
                        log_z=None,
                        pos_mf_steps=50,
                        pos_sample_steps=0):
    """
    Compute estimate of log-partition function and likelihood of trainset and
    testset

    Parameters
    ----------
    W_list : array-like object of theano shared variables
    b_list : array-like object of theano shared variables
        Biases of the DBM
    trainset : pylearn2.datasets.dataset.Dataset
        Training set
    testset : pylearn2.datasets.dataset.Dataset
        Test set
    free_energy_fn : theano.function
        Function which, given temperature beta_k, computes the free energy
        of the samples stored in model.samples. This function should return
        a symbolic vector.
    batch_size : integer
        Size of a batch of examples
    large_ais : boolean
        If True, will use 3e5 chains, instead of 3e4
    log_z : log-partition function (if precomputed)
    pos_mf_steps: the number of fixed-point iterations for approximate inference
    pos_sample_steps: same thing as pos_mf_steps
        when both pos_mf_steps > 0 and pos_sample_steps > 0,
        pos_mf_steps has a priority

    Returns
    -------
    nll : scalar
        Negative log-likelihood of data.X under `model`.
    logz : scalar
        Estimate of log-partition function of `model`.
    """

    warnings.warn("This is garanteed to work only for DBMs with a " +
                  "BinaryVector visible layer and BinaryVectorMaxPool " +
                  "hidden layers with pool sizes of 1.")

    # Add a dummy placeholder for visible layer's weights in W_list
    W_list = [None] + W_list

    # Depth of the DBM
    depth = len(b_list)

    # Initialize samples
    psamples = []
    nsamples = []
    for i, b in enumerate(b_list):
        psamples += [
            utils.sharedX(rng.rand(batch_size,
                                   b.get_value().shape[0]),
                          name='psamples%i' % i)
        ]
        nsamples += [
            utils.sharedX(rng.rand(batch_size,
                                   b.get_value().shape[0]),
                          name='nsamples%i' % i)
        ]
    psamples[0] = T.matrix('psamples0')

    ##########################
    ## BUILD THEANO FUNCTIONS
    ##########################
    beta = T.scalar()

    # For an even number of layers, we marginalize the odd layers
    # (and vice-versa)
    marginalize_odd = (depth % 2) == 0

    # Build function to retrieve energy.
    E = -T.dot(nsamples[0], b_list[0]) * beta
    for i in xrange(1, depth):
        E -= T.sum(T.dot(nsamples[i - 1], W_list[i] * beta) * nsamples[i],
                   axis=1)
        E -= T.dot(nsamples[i], b_list[i] * beta)
    energy_fn = theano.function([beta], E)

    # Build inference function.
    assert (pos_mf_steps or pos_sample_steps)
    pos_steps = pos_mf_steps if pos_mf_steps else pos_sample_steps
    new_psamples = _e_step(psamples, W_list, b_list, n_steps=pos_steps)
    ups = OrderedDict()
    for psample, new_psample in zip(psamples[1:], new_psamples[1:]):
        ups[psample] = new_psample
    temp = numpy.asarray(trainset.X, dtype=floatX)
    mean_train = numpy.mean(temp, axis=0)
    inference_fn = theano.function(inputs=[psamples[0]],
                                   outputs=[],
                                   updates=ups)

    # Configure baserate bias for (h0 if `marginalize_odd` else h1)
    inference_fn(numpy.tile(mean_train, (batch_size, 1)))
    numpy_psamples = [mean_train[None, :]] + \
                     [psample.get_value() for psample in psamples[1:]]
    mean_pos = numpy.minimum(numpy_psamples[not marginalize_odd], 1 - 1e-5)
    mean_pos = numpy.maximum(mean_pos, 1e-5)
    pa_bias = -numpy.log(1. / mean_pos[0] - 1.)

    # Build Theano function to sample from interpolating distributions.
    updates = OrderedDict()
    new_nsamples = neg_sampling(W_list,
                                b_list,
                                nsamples,
                                beta=beta,
                                pa_bias=pa_bias,
                                marginalize_odd=marginalize_odd,
                                theano_rng=theano_rng)
    for (nsample, new_nsample) in zip(nsamples, new_nsamples):
        updates[nsample] = new_nsample
    sample_fn = theano.function([beta], [],
                                updates=updates,
                                name='sample_func')

    # Build function to compute free-energy of p_k(h1).
    fe_bp_h1 = free_energy_at_beta(W_list,
                                   b_list,
                                   nsamples,
                                   beta,
                                   pa_bias,
                                   marginalize_odd=marginalize_odd)
    free_energy_fn = theano.function([beta], fe_bp_h1)

    ###########
    ## RUN AIS
    ###########

    # Generate exact sample for the base model.
    for i, nsample_i in enumerate(nsamples):
        bias = pa_bias if i == 1 else b_list[i].get_value()
        hi_mean_vec = 1. / (1. + numpy.exp(-bias))
        hi_mean = numpy.tile(hi_mean_vec, (batch_size, 1))
        r = rng.random_sample(hi_mean.shape)
        hi_sample = numpy.array(hi_mean > r, dtype=floatX)
        nsample_i.set_value(hi_sample)

    # Default configuration for interpolating distributions
    if large_ais:
        betas = numpy.cast[floatX](numpy.hstack(
            (numpy.linspace(0, 0.5, 1e5 + 1)[:-1],
             numpy.linspace(0.5, 0.9,
                            1e5 + 1)[:-1], numpy.linspace(0.9, 1.0, 1e5))))
    else:
        betas = numpy.cast[floatX](numpy.hstack(
            (numpy.linspace(0, 0.5, 1e4 + 1)[:-1],
             numpy.linspace(0.5, 0.9,
                            1e4 + 1)[:-1], numpy.linspace(0.9, 1.0, 1e4))))

    if log_z is None:
        log_ais_w = compute_log_ais_weights(batch_size, free_energy_fn,
                                            sample_fn, betas)
        dlogz, var_dlogz = estimate_from_weights(log_ais_w)
        log_za = compute_log_za(b_list, pa_bias, marginalize_odd)
        log_z = log_za + dlogz
        logging.info('log_z = %f' % log_z)
        logging.info('log_za = %f' % log_za)
        logging.info('dlogz = %f' % dlogz)
        logging.info('var_dlogz = %f' % var_dlogz)

    train_ll = compute_likelihood_given_logz(nsamples, psamples, batch_size,
                                             energy_fn, inference_fn, log_z,
                                             trainset.X)
    logging.info('Training likelihood = %f' % train_ll)
    test_ll = compute_likelihood_given_logz(nsamples, psamples, batch_size,
                                            energy_fn, inference_fn, log_z,
                                            testset.X)
    logging.info('Test likelihood = %f' % test_ll)

    return (train_ll, test_ll, log_z)
def check_sample_correctishness_channelwise(f):
    """
    Tests that the sample mean converges to the conditional expectation given
    by the function Tests that p really is the max of the samples tests that
    at most one h in a group is on
    """

    batch_size = 27
    pool_size = 4
    n = pool_size * 21

    rng = np.random.RandomState([2012, 9, 26])
    zv = rng.randn(batch_size, n).astype(config.floatX) * 3.5 - 5.
    top_down_v = rng.randn(batch_size, n / pool_size).astype(config.floatX)

    z_th = T.matrix()
    z_th.tag.test_value = zv
    z_th.name = 'z_th'

    top_down_th = T.matrix()
    top_down_th.tag.test_value = top_down_v
    top_down_th.name = 'top_down_th'

    theano_rng = MRG_RandomStreams(rng.randint(2147462579))
    p_th, h_th, p_sth, h_sth = f(z_th, pool_size, top_down_th, theano_rng)

    prob_func = function([z_th, top_down_th], [p_th, h_th])
    pv, hv = prob_func(zv, top_down_v)

    sample_func = function([z_th, top_down_th], [p_sth, h_sth])

    acc_p = 0. * pv
    acc_h = 0. * hv

    # make sure the test gets good coverage, ie, that it includes
    # many different activation probs for both detector and pooling layer
    buckets = 10
    bucket_width = 1. / float(buckets)
    print(pv.min(), pv.max())
    print(hv.min(), hv.max())
    for i in xrange(buckets):
        lower_lim = i * bucket_width
        upper_lim = (i+1) * bucket_width
        print(lower_lim, upper_lim)

        assert np.any((pv >= lower_lim) * (pv < upper_lim))
        assert np.any((hv >= lower_lim) * (hv < upper_lim))

    assert upper_lim == 1.

    for i in xrange(10000):
        ps, hs = sample_func(zv, top_down_v)

        assert ps.shape == pv.shape
        assert hs.shape == hv.shape

        acc_p += ps
        acc_h += hs

    est_p = acc_p / float(i+1)
    est_h = acc_h / float(i+1)

    pd = np.abs(est_p-pv)
    hd = np.abs(est_h-hv)

    """
    # plot maps of the estimation error, this is to see if it has some
    # spatial pattern this is useful for detecting bugs like not handling
    # the border correctly, etc.
    # from pylearn2.gui.patch_viewer import PatchViewer

    pv = PatchViewer((pd.shape[0],pd.shape[3]),(pd.shape[1],pd.shape[2]),
                                                            is_color = False)
    for i in xrange(pd.shape[0]):
    for j in xrange(pd.shape[3]):
    pv.add_patch( (pd[i,:,:,j] / pd.max() )* 2.0 - 1.0, rescale = False)
    pv.show()

    pv = PatchViewer((hd.shape[0],hd.shape[3]),(hd.shape[1],hd.shape[2]),
                                                            is_color = False)
    for i in xrange(hd.shape[0]):
    for j in xrange(hd.shape[3]):
    pv.add_patch( (hd[i,:,:,j] / hd.max() )* 2.0 - 1.0, rescale = False)
    pv.show()
    """

    """
    plot expectation to estimate versus error in estimation
    expect bigger errors for values closer to 0.5

    from matplotlib import pyplot as plt

    #nelem = reduce( lambda x, y : x*y, pd.shape)
    #plt.scatter( pv.reshape(nelem), pd.reshape(nelem))
    #plt.show()

    nelem = reduce( lambda x, y : x*y, hd.shape)
    plt.scatter( hv.reshape(nelem), hd.reshape(nelem))
    plt.show()
    """

    # don't really know how tight this should be
    # but you can try to pose an equivalent problem
    # and implement it in another way
    # using a numpy implementation in softmax_acc.py
    # I got a max error of .17
    assert max(pd.max(), hd.max()) < .17

    # Do exhaustive checks on just the last sample
    assert np.all((ps == 0) + (ps == 1))
    assert np.all((hs == 0) + (hs == 1))

    for k in xrange(batch_size):
        for i in xrange(ps.shape[1]):
            p = ps[k, i]
            h = hs[k, i*pool_size:(i+1)*pool_size]
            assert h.shape == (pool_size,)
            assert p == h.max()
            assert h.sum() <= 1

    """ If you made it to here, it's correctish
Example #18
0
print(W2.shape)

prod = np.dot(W1, W2)
pv = make_viewer(prod.T)
if out_prefix is None:
    pv.show()
else:
    pv.save(out_prefix + "_prod.png")

print('Sorting so largest-norm layer 2 weights are plotted at the top')
norms = np.square(W2).sum(axis=0)
idxs = [elem[1] for elem in sorted(zip(-norms, range(norms.shape[0])))]

new = W2.copy()

for i in xrange(len(idxs)):
    new[:, i] = W2[:, idxs[i]]
W2 = new

dataset_yaml_src = model.dataset_yaml_src
dataset = yaml_parse.load(dataset_yaml_src)

import numpy as np

imgs = dataset.get_weights_view(W1.T)

N1 = W1.shape[1]
N = W2.shape[1]

N = min(N, 100)
Example #19
0
assert dataset.X.shape[0] % batch_size == 0

X = model.get_input_space().make_batch_theano()
Y = model.fprop(X)

from theano import tensor as T

y = T.argmax(Y, axis=1)

from theano import function

f = function([X], y)

y = []

for i in xrange(int(dataset.X.shape[0] / batch_size)):
    x_arg = dataset.X[i * batch_size:(i + 1) * batch_size, :]
    if X.ndim > 2:
        x_arg = dataset.get_topological_view(x_arg)
    y.append(f(x_arg.astype(X.dtype)))

y = np.concatenate(y)
assert y.ndim == 1
assert y.shape[0] == dataset.X.shape[0]
# discard any zero-padding that was used to give the batches uniform size
y = y[:m]

out = open(out_path, 'w')
for i in xrange(y.shape[0]):
    out.write('%d\n' % y[i])
out.close()
Example #20
0
def pooling_matrix(groups, per_group, strides=None, dtype=None, sparse=None):
    """
    Construct a pooling matrix, optionally with overlapping pools
    arranged in a 1 or 2D topology.

    Parameters
    ----------
    groups : int or tuple
        The grid dimensions of a 1- or 2-dimensional pooling grid.
    per_group : int or tuple
        The grid dimensions of a single 1- or 2-dimensional feature
        pool. Must be same length as `groups`.
    strides : int or tuple, optional
        The stride of the pools along each dimension. A value of `None`
        is equivalent to setting equal to `per_group`, i.e. no overlap
    dtype : dtype object or str, optional
        The dtype of the resulting pooling matrix.
    sparse : str, optional
        If `None`, the function will return a dense matrix (a rank-2
        `numpy.ndarray`). Specifying 'csc' or 'csr' in this argument will
        cause the function to return a `scipy.sparse.csc_matrix` or a
        `scipy.sparse.csr_matrix`, instead.

    Returns
    -------
    pools : ndarray or sparse matrix
        Either a dense 2-dimensional NumPy array or one of
        `scipy.sparse.csc_matrix` or `scipy.sparse.csr_matrix`, depending
        on the value of the `sparse` argument. In any case, the shape is
        `(n_pools, n_filters)` and the value of `pools[i, j]` is 1 if
        feature `j` is in pool `i`, and 0 otherwise.
    """

    # Error-check arguments and fill in row_stride and col_stride
    # if either argument is absent.
    def _validate_shape(shape, param_name):
        try:
            shape = tuple(shape)
            [int(val) for val in shape]
        except (ValueError, TypeError):
            try:
                shape = (int(shape), )
            except TypeError:
                reraise_as(
                    TypeError("%s must be int or int tuple" % param_name))
        return shape

    groups = _validate_shape(groups, 'groups')
    per_group = _validate_shape(per_group, 'per_group')
    if strides is not None:
        strides = _validate_shape(strides, 'strides')
    else:
        strides = per_group
    if len(groups) != len(per_group):
        raise ValueError('groups and per_group must have the same length')
    elif len(per_group) != len(strides):
        raise ValueError('per_group and strides must have the same length')
    if len(groups) > 2 or len(per_group) > 2:
        raise ValueError('only <= 2-dimensional pooling grids are supported')
    if not all(stride <= dim for stride, dim in izip(strides, per_group)):
        raise ValueError('strides must each be <= per_group dimensions')
    try:
        group_rows, group_cols = groups
        rows_per_group, cols_per_group = per_group
        row_stride, col_stride = strides
    except ValueError:
        group_rows, group_cols = groups[0], 1
        rows_per_group, cols_per_group = per_group[0], 1
        row_stride, col_stride = strides[0], 1
    if sparse is not None and sparse not in ('csc', 'csr'):
        raise ValueError("sparse must be one of (None, 'csr', 'csc')")
    # The total number of filters along either dimension is the
    # the number of groups times the stride, plus whatever dangles
    # off the last filter (the added term is zero if there's no
    # overlapping pools).
    filter_rows = group_rows * row_stride + (rows_per_group - row_stride)
    filter_cols = group_cols * col_stride + (cols_per_group - col_stride)
    if dtype is None:
        dtype = theano.config.floatX
    # If the return type is dense we can treat it as a 4-tensor and
    # then reshape. If not we'll need some index math, but it happens
    shape = (group_rows, group_cols, filter_rows, filter_cols)
    matrix_shape = group_rows * group_cols, filter_rows * filter_cols
    if sparse is not None:
        # Use a dictionary-of-keys matrix at construction time,
        # since they are efficient for arbitrary assignment.
        # TODO: I think CSC/CSR are fast to construct if you know the total
        # number of elements, which should be easy to calculate.
        pools = scipy.sparse.dok_matrix(matrix_shape, dtype=dtype)
    else:
        pools = np.zeros(shape, dtype=dtype)
    for g_row in xrange(group_rows):
        for g_col in xrange(group_cols):
            # The start and end points of the contiguous block of 1's.
            row_start = row_stride * g_row
            row_end = row_start + rows_per_group
            col_start = col_stride * g_col
            col_end = col_start + cols_per_group
            if sparse is not None:
                for f_row in xrange(row_start, row_end):
                    matrix_cols = slice(f_row * shape[3] + col_start,
                                        f_row * shape[3] + col_end)
                    # The group to which this belongs.
                    matrix_row = g_row * shape[1] + g_col
                    pools[matrix_row, matrix_cols] = 1.
            else:
                # If the matrix is a dense 4-tensor then we can get
                # away with doing an entire pool in one assignment.
                pools[g_row, g_col, row_start:row_end, col_start:col_end] = 1
    if sparse is not None:
        # Call either .tocsr() or .tocsc()
        pools = getattr(pools, 'to' + sparse)()
    else:
        pools = pools.reshape(matrix_shape)
    return pools
Example #21
0
import sys
from theano.compat.six.moves import xrange
pyplot.hold(True)

from pylearn2.utils import serial

model_paths = sys.argv[1:]

smoothing = 1
try:
    smoothing = int(model_paths[0])
    model_paths = model_paths[1:]
except Exception:
    pass

count = 0
style = '-'
for model_path in model_paths:
    model = serial.load(model_path)
    smoothed_reward_record = []
    count += 1
    if count > 7:
        style = '+'
    for i in xrange(smoothing - 1, len(model.reward_record)):
        smoothed_reward_record.append(
            sum(model.reward_record[i - smoothing + 1:i + 1]) /
            float(smoothing))
    pyplot.plot(smoothed_reward_record, style, label=model_path)
pyplot.legend()
pyplot.show()
Example #22
0
    def __init__(self,
                 data_file,
                 which_set,
                 batch_size,
                 sequence=1,
                 normalise=None,
                 shuffle=False,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c']):

        self.args = locals()

        assert which_set in ['train', 'valid']

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        topo_view = N.load(data_file)

        if start is not None:
            assert stop is not None
            assert start >= 0
            assert stop > start
            assert ((stop - start) % batch_size) == 0
            if stop > topo_view.shape[0]:
                raise ValueError('stop=' + str(stop) + '>' + 'm=' +
                                 str(self.X.shape[0]))
            topo_view = topo_view[start:stop, :]
            if topo_view.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d" %
                                 (self.X.shape[0], start, stop))

        if normalise == 1:
            topo_mean = topo_view.mean()
            topo_std = topo_view.std()
        elif normalise == 2:
            topo_mean = topo_view.mean(axis=0)
            topo_std = topo_view.std(axis=0)

        topo_view = (topo_view - topo_mean) / topo_std

        if sequence != 1:
            temp = topo_view
            topo_view = np.zeros([temp.shape[0], sequence, temp.shape[1]])
            for i in range(0, temp.shape[0] - sequence):
                topo_view[i, :, :] = temp[i:i + sequence, :].reshape(
                    1, sequence, temp.shape[1])
            del temp
        else:
            topo_view = topo_view.reshape(topo_view.shape[0], 1,
                                          topo_view.shape[1])

        m, r, c = topo_view.shape
        assert r == sequence
        topo_view = topo_view.reshape(m, r, c, 1)

        if shuffle:
            self.shuffle_rng = make_np_rng(None, [1, 2, 3],
                                           which_method="shuffle")
            for i in xrange(topo_view.shape[0]):
                j = self.shuffle_rng.randint(m)
                # Copy ensures that memory is not aliased.
                tmp = topo_view[i, :, :, :].copy()
                topo_view[i, :, :, :] = topo_view[j, :, :, :]
                topo_view[j, :, :, :] = tmp

        super(CustomLoader, self).__init__(topo_view=dimshuffle(topo_view))

        assert not N.any(N.isnan(self.X))
Example #23
0
    def __init__(self,
                 which_set,
                 shuffle=False,
                 start=None,
                 stop=None,
                 axes=['b', 0, 1, 'c'],
                 preprocessor=None,
                 fit_preprocessor=False,
                 fit_test_preprocessor=False):
        self.args = locals()

        if which_set not in ['train', 'valid', 'test']:
            raise ValueError('Unrecognized which_set value "%s".' %
                             (which_set, ) + '". Valid values are ' +
                             '["train", "valid", "test"].')

        def dimshuffle(b01c):
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/binarized_mnist/binarized_mnist_" + \
                   which_set + ".npy"
            im_path = serial.preprocess(path)

            # Locally cache the files before reading them
            datasetCache = cache.datasetCache
            im_path = datasetCache.cache_file(im_path)

            try:
                X = serial.load(im_path)
            except IOError:
                raise NotInstalledError("BinarizedMNIST data files cannot be "
                                        "found in ${PYLEARN2_DATA_PATH}. Run "
                                        "pylearn2/scripts/datasets/"
                                        "download_binarized_mnist.py to get "
                                        "the data")
        else:
            if which_set == 'train':
                size = 50000
            else:
                size = 10000
            X = numpy.random.binomial(n=1, p=0.5, size=(size, 28**2))

        m, d = X.shape
        assert d == 28**2
        if which_set == 'train':
            assert m == 50000
        else:
            assert m == 10000

        if shuffle:
            self.shuffle_rng = make_np_rng(None, [1, 2, 3],
                                           which_method="shuffle")
            for i in xrange(X.shape[0]):
                j = self.shuffle_rng.randint(m)
                # Copy ensures that memory is not aliased.
                tmp = X[i, :].copy()
                X[i, :] = X[j, :]
                X[j, :] = tmp

        super(BinarizedMNIST,
              self).__init__(X=X,
                             view_converter=DefaultViewConverter(shape=(28, 28,
                                                                        1)))

        assert not numpy.any(numpy.isnan(self.X))

        if start is not None:
            assert start >= 0
            if stop > self.X.shape[0]:
                raise ValueError('stop=' + str(stop) + '>' + 'm=' +
                                 str(self.X.shape[0]))
            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d" %
                                 (self.X.shape[0], start, stop))

        if which_set == 'test':
            assert fit_test_preprocessor is None or \
                (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Example #24
0
def main(options, positional_args):
    """
    .. todo::

        WRITEME
    """
    assert len(positional_args) == 1

    path, = positional_args

    out = options.out
    rescale = options.rescale

    if rescale == 'none':
        global_rescale = False
        patch_rescale = False
    elif rescale == 'global':
        global_rescale = True
        patch_rescale = False
    elif rescale == 'individual':
        global_rescale = False
        patch_rescale = True
    else:
        assert False

    if path.endswith('.pkl'):
        from pylearn2.utils import serial
        obj = serial.load(path)
    elif path.endswith('.yaml'):
        print('Building dataset from yaml...')
        obj = yaml_parse.load_path(path)
        print('...done')
    else:
        obj = yaml_parse.load(path)

    rows = options.rows
    cols = options.cols

    if hasattr(obj, 'get_batch_topo'):
        #obj is a Dataset
        dataset = obj

        examples = dataset.get_batch_topo(rows * cols)
    else:
        #obj is a Model
        model = obj
        from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
        theano_rng = RandomStreams(42)
        design_examples_var = model.random_design_matrix(batch_size=rows *
                                                         cols,
                                                         theano_rng=theano_rng)
        from theano import function
        print('compiling sampling function')
        f = function([], design_examples_var)
        print('sampling')
        design_examples = f()
        print('loading dataset')
        dataset = yaml_parse.load(model.dataset_yaml_src)
        examples = dataset.get_topological_view(design_examples)

    norms = N.asarray([
        N.sqrt(N.sum(N.square(examples[i, :])))
        for i in xrange(examples.shape[0])
    ])
    print('norms of examples: ')
    print('\tmin: ', norms.min())
    print('\tmean: ', norms.mean())
    print('\tmax: ', norms.max())

    print('range of elements of examples', (examples.min(), examples.max()))
    print('dtype: ', examples.dtype)

    examples = dataset.adjust_for_viewer(examples)

    if global_rescale:
        examples /= N.abs(examples).max()

    if len(examples.shape) != 4:
        print('sorry, view_examples.py only supports image examples for now.')
        print('this dataset has ' + str(len(examples.shape) - 2) +
              ' topological dimensions')
        quit(-1)

    is_color = False
    assert examples.shape[3] == 2

    print(examples.shape[1:3])

    pv = patch_viewer.PatchViewer((rows, cols * 2),
                                  examples.shape[1:3],
                                  is_color=is_color)

    for i in xrange(rows * cols):
        # Load patches in backwards order for easier cross-eyed viewing
        # (Ian can't do the magic eye thing where you focus your eyes past the screen, must
        # focus eyes in front of screen)
        pv.add_patch(examples[i, :, :, 1],
                     activation=0.0,
                     rescale=patch_rescale)
        pv.add_patch(examples[i, :, :, 0],
                     activation=0.0,
                     rescale=patch_rescale)

    if out is None:
        pv.show()
    else:
        pv.save(out)
Example #25
0
def tile_raster_images(X,
                       img_shape,
                       tile_shape,
                       tile_spacing=(0, 0),
                       scale_rows_to_unit_interval=True,
                       output_pixel_vals=True):
    """
    Transform an array with one flattened image per row, into an array in
    which images are reshaped and layed out like tiles on a floor.

    This function is useful for visualizing datasets whose rows are images,
    and also columns of matrices for transforming those rows
    (such as the first layer of a neural net).

    Parameters
    ----------
    x : numpy.ndarray
        2-d ndarray or 4 tuple of 2-d ndarrays or None for channels,
        in which every row is a flattened image.

    shape : 2-tuple of ints
        The first component is the height of each image,
        the second component is the width.

    tile_shape : 2-tuple of ints
        The number of images to tile in (row, columns) form.

    scale_rows_to_unit_interval : bool
        Whether or not the values need to be before being plotted to [0, 1].

    output_pixel_vals : bool
        Whether or not the output should be pixel values (int8) or floats.

    Returns
    -------
    y : 2d-ndarray
        The return value has the same dtype as X, and is suitable for
        viewing as an image with PIL.Image.fromarray.
    """

    assert len(img_shape) == 2
    assert len(tile_shape) == 2
    assert len(tile_spacing) == 2

    # The expression below can be re-written in a more C style as
    # follows :
    #
    # out_shape    = [0,0]
    # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] -
    #                tile_spacing[0]
    # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] -
    #                tile_spacing[1]
    out_shape = [
        (ishp + tsp) * tshp - tsp
        for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing)
    ]

    if isinstance(X, tuple):
        assert len(X) == 4
        # Create an output np ndarray to store the image
        if output_pixel_vals:
            out_array = np.zeros((out_shape[0], out_shape[1], 4),
                                 dtype='uint8')
        else:
            out_array = np.zeros((out_shape[0], out_shape[1], 4),
                                 dtype=X.dtype)

        #colors default to 0, alpha defaults to 1 (opaque)
        if output_pixel_vals:
            channel_defaults = [0, 0, 0, 255]
        else:
            channel_defaults = [0., 0., 0., 1.]

        for i in xrange(4):
            if X[i] is None:
                # if channel is None, fill it with zeros of the correct
                # dtype
                dt = out_array.dtype
                if output_pixel_vals:
                    dt = 'uint8'
                out_array[:, :, i] = np.zeros(out_shape, dtype=dt) + \
                                     channel_defaults[i]
            else:
                # use a recurrent call to compute the channel and store it
                # in the output
                out_array[:, :,
                          i] = tile_raster_images(X[i], img_shape, tile_shape,
                                                  tile_spacing,
                                                  scale_rows_to_unit_interval,
                                                  output_pixel_vals)
        return out_array

    else:
        # if we are dealing with only one channel
        H, W = img_shape
        Hs, Ws = tile_spacing

        # generate a matrix to store the output
        dt = X.dtype
        if output_pixel_vals:
            dt = 'uint8'
        out_array = np.zeros(out_shape, dtype=dt)

        for tile_row in xrange(tile_shape[0]):
            for tile_col in xrange(tile_shape[1]):
                if tile_row * tile_shape[1] + tile_col < X.shape[0]:
                    this_x = X[tile_row * tile_shape[1] + tile_col]
                    if scale_rows_to_unit_interval:
                        # if we should scale values to be between 0 and 1
                        # do this by calling the `scale_to_unit_interval`
                        # function
                        this_img = scale_to_unit_interval(
                            this_x.reshape(img_shape))
                    else:
                        this_img = this_x.reshape(img_shape)
                    # add the slice to the corresponding position in the
                    # output array
                    c = 1
                    if output_pixel_vals:
                        c = 255
                    out_array[tile_row * (H + Hs):tile_row * (H + Hs) + H,
                              tile_col * (W + Ws):tile_col * (W + Ws) +
                              W] = this_img * c
        return out_array
Example #26
0
    def evaluate(imshp, kshp, offset=(1, 1), nkern=1, mode='valid', ws=True):
        """
        Build a sparse matrix which can be used for performing...
        * convolution: in this case, the dot product of this matrix with the
          input images will generate a stack of images patches. Convolution is
          then a tensordot operation of the filters and the patch stack.
        * sparse local connections: in this case, the sparse matrix allows us
          to operate the weight matrix as if it were fully-connected. The
          structured-dot with the input image gives the output for the
          following layer.

        Parameters
        ----------
        ker_shape : tuple
            Shape of kernel to apply (smaller than image)
        img_shape: tuple
            Shape of input images
        mode : str
            'valid' generates output only when kernel and image overlap. \
            'full' full convolution obtained by zero-padding the input
        ws : bool
            True if weight sharing, False otherwise
        offset : tuple of int
            Offset parameter. In the case of no weight sharing, gives the \
            pixel offset between two receptive fields. With weight sharing \
            gives the offset between the top-left pixels of the generated \
            patches

        Returns
        -------
        rval : tuple(indices, indptr, logical_shape, sp_type, out_img_shp)
            The structure of a sparse matrix, and the logical dimensions of \
            the image which will be the result of filtering.
        """
        N = numpy
        dx, dy = offset

        # inshp contains either 2 entries (height,width) or 3 (nfeatures,h,w)
        # in the first case, default nfeatures to 1
        if N.size(imshp) == 2:
            inshp = (1, ) + imshp

        inshp = N.array(imshp)
        kshp = N.array(kshp)
        ksize = N.prod(kshp)

        kern = ksize - 1 - N.arange(ksize)

        # size of output image if doing proper convolution (mode='full',dx=dy=0)
        # outshp is the actual output shape given the parameters
        fulloutshp = inshp[1:] + kshp - 1
        s = -1 if mode == 'valid' else 1
        outshp = N.int64(N.ceil((inshp[1:] + s*kshp - s*1) \
                 /N.array([dy,dx], dtype='float')))
        if any(outshp <= 0):
            err = 'Invalid kernel', kshp,'and/or step size',(dx,dy),\
                  'for given input shape', inshp
            raise ValueError(err)

        outsize = N.prod(outshp)
        insize = N.prod(inshp)

        # range of output units over which to iterate
        lbound = N.array([kshp[0] - 1, kshp[1] -
                          1]) if mode == 'valid' else N.zeros(2)
        ubound = lbound + (inshp[1:] - kshp +
                           1) if mode == 'valid' else fulloutshp

        # coordinates of image in "fulloutshp" coordinates
        topleft = N.array([kshp[0] - 1, kshp[1] - 1])
        botright = topleft + inshp[
            1:]  # bound when counting the receptive field

        # sparse matrix specifics...
        spmatshp = (outsize*N.prod(kshp)*inshp[0],insize) if ws else\
                   (nkern*outsize,insize)
        spmat = scipy_sparse.lil_matrix(spmatshp)

        # loop over output image pixels
        z, zz = 0, 0

        # incremented every time we write something to the sparse matrix
        # this is used to track the ordering of filter tap coefficient in sparse
        # column ordering
        tapi, ntaps = 0, 0

        # Note: looping over the number of kernels could've been done more efficiently
        # as the last step (when writing to spmat). However, this messes up the ordering
        # of the column values (order in which you write the values determines how the
        # vectorized data will get used later one)

        for fmapi in range(inshp[0]):  # loop over input features
            for n in range(
                    nkern
            ):  # loop over number of kernels (nkern=1 for weight sharing)

                # FOR EACH OUTPUT PIXEL...
                for oy in N.arange(lbound[0], ubound[0],
                                   dy):  # loop over output image height
                    for ox in N.arange(lbound[1], ubound[1],
                                       dx):  # loop over output image width

                        l = 0  # kern[l] is filter value to apply at (oj,oi) for (iy,ix)

                        # ... ITERATE OVER INPUT UNITS IN RECEPTIVE FIELD
                        for ky in oy + N.arange(kshp[0]):
                            for kx in ox + N.arange(kshp[1]):

                                # verify if we are still within image boundaries. Equivalent to
                                # zero-padding of the input image
                                if all((ky, kx) >= topleft) and all(
                                    (ky, kx) < botright):

                                    # convert to "valid" input space coords
                                    # used to determine column index to write to in sparse mat
                                    iy, ix = N.array((ky, kx)) - topleft
                                    # determine raster-index of input pixel...
                                    col = iy*inshp[2]+ix +\
                                          fmapi*N.prod(inshp[1:]) # taking into account multiple input features

                                    # convert oy,ox values to output space coordinates
                                    (y, x) = (oy, ox) if mode == 'full' else (
                                        oy, ox) - topleft
                                    (y, x) = N.array([y, x]) / (
                                        dy, dx
                                    )  # taking into account step size
                                    # convert to row index of sparse matrix
                                    row = (y*outshp[1]+x)*inshp[0]*ksize + l + fmapi*ksize if ws else\
                                          y*outshp[1] + x

                                    # Store something at that location in sparse matrix.
                                    # The written value is only useful for the sparse case. It
                                    # will determine the way kernel taps are mapped onto
                                    # the sparse columns (idea of kernel map)
                                    spmat[
                                        row + n * outsize,
                                        col] = tapi + 1  # n*... only for sparse

                                    # total number of active taps (used for kmap)
                                    ntaps += 1

                                tapi += 1  # absolute tap index (total number of taps)
                                l += 1  # move on to next filter tap l=(l+1)%ksize

        if spmat.format != 'csc':
            spmat = spmat.tocsc().ensure_sorted_indices()
        else:
            # BUG ALERT: scipy0.6 has bug where data and indices are written in reverse column
            # ordering. Explicit call to ensure_sorted_indices removes this problem
            spmat = spmat.ensure_sorted_indices()

        if ws:
            kmap = None
        else:
            kmap = N.zeros(ntaps, dtype='int')
            k = 0
            #print 'TEMPORARY BUGFIX: REMOVE !!!'
            for j in xrange(spmat.shape[1]):
                for i_idx in xrange(spmat.indptr[j], spmat.indptr[j + 1]):
                    if spmat.data[i_idx] != 0:
                        kmap[k] = spmat.data[
                            i_idx] - 1  # this is == spmat[i,j] - 1
                        k += 1

        # when in valid mode, it is more efficient to store in sparse row
        # TODO: need to implement structured dot for csr matrix
        assert spmat.format == 'csc'
        sptype = 'csc'
        #sptype = 'csr' if mode=='valid' else 'csc'
        use_csr_type = 0
        if use_csr_type and mode == 'valid':
            spmat = spmat.tocsr()

        rval = (spmat.indices[:spmat.size], spmat.indptr, spmatshp, sptype,
                outshp)
        rval += (kmap, ) if kmap != None else ()

        return rval
Example #27
0
def main():
    """
    .. todo::

        WRITEME
    """
    parser = argparse.ArgumentParser()
    parser.add_argument("--out")
    parser.add_argument("model_paths", nargs='+')
    options = parser.parse_args()
    model_paths = options.model_paths

    if options.out is not None:
      import matplotlib
      matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    print('generating names...')
    model_names = [model_path.replace('.pkl', '!') for model_path in
            model_paths]
    model_names = unique_substrings(model_names, min_size=10)
    model_names = [model_name.replace('!','') for model_name in
            model_names]
    print('...done')

    for i, arg in enumerate(model_paths):
        try:
            model = serial.load(arg)
        except Exception:
            if arg.endswith('.yaml'):
                print(sys.stderr, arg + " is a yaml config file," + 
                      "you need to load a trained model.", file=sys.stderr)
                quit(-1)
            raise
        this_model_channels = model.monitor.channels

        if len(sys.argv) > 2:
            postfix = ":" + model_names[i]
        else:
            postfix = ""

        for channel in this_model_channels:
            channels[channel+postfix] = this_model_channels[channel]
        del model
        gc.collect()


    while True:
        # Make a list of short codes for each channel so user can specify them
        # easily
        tag_generator = _TagGenerator()
        codebook = {}
        sorted_codes = []
        for channel_name in sorted(channels,
                key = number_aware_alphabetical_key):
            code = tag_generator.get_tag()
            codebook[code] = channel_name
            codebook['<'+channel_name+'>'] = channel_name
            sorted_codes.append(code)

        x_axis = 'example'
        print('set x_axis to example')

        if len(channels.values()) == 0:
            print("there are no channels to plot")
            break

        # If there is more than one channel in the monitor ask which ones to
        # plot
        prompt = len(channels.values()) > 1

        if prompt:

            # Display the codebook
            for code in sorted_codes:
                print(code + '. ' + codebook[code])

            print()

            print("Put e, b, s or h in the list somewhere to plot " + 
                    "epochs, batches, seconds, or hours, respectively.")
            response = input('Enter a list of channels to plot ' + \
                    '(example: A, C,F-G, h, <test_err>) or q to quit' + \
                    ' or o for options: ')

            if response == 'o':
                print('1: smooth all channels')
                print('any other response: do nothing, go back to plotting')
                response = input('Enter your choice: ')
                if response == '1':
                    for channel in channels.values():
                        k = 5
                        new_val_record = []
                        for i in xrange(len(channel.val_record)):
                            new_val = 0.
                            count = 0.
                            for j in xrange(max(0, i-k), i+1):
                                new_val += channel.val_record[j]
                                count += 1.
                            new_val_record.append(new_val / count)
                        channel.val_record = new_val_record
                continue

            if response == 'q':
                break

            #Remove spaces
            response = response.replace(' ','')

            #Split into list
            codes = response.split(',')

            final_codes = set([])

            for code in codes:
                if code == 'e':
                    x_axis = 'epoch'
                    continue
                elif code == 'b':
                    x_axis = 'batche'
                elif code == 's':
                    x_axis = 'second'
                elif code == 'h':
                    x_axis = 'hour'
                elif code.startswith('<'):
                    assert code.endswith('>')
                    final_codes.add(code)
                elif code.find('-') != -1:
                    #The current list element is a range of codes

                    rng = code.split('-')

                    if len(rng) != 2:
                        print("Input not understood: "+code)
                        quit(-1)

                    found = False
                    for i in xrange(len(sorted_codes)):
                        if sorted_codes[i] == rng[0]:
                            found = True
                            break

                    if not found:
                        print("Invalid code: "+rng[0])
                        quit(-1)

                    found = False
                    for j in xrange(i,len(sorted_codes)):
                        if sorted_codes[j] == rng[1]:
                            found = True
                            break

                    if not found:
                        print("Invalid code: "+rng[1])
                        quit(-1)

                    final_codes = final_codes.union(set(sorted_codes[i:j+1]))
                else:
                    #The current list element is just a single code
                    final_codes = final_codes.union(set([code]))
            # end for code in codes
        else:
            final_codes ,= set(codebook.keys())

        colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
        styles = list(colors)
        styles += [color+'--' for color in colors]
        styles += [color+':' for color in colors]

        fig = plt.figure()
        ax = plt.subplot(1,1,1)

        # plot the requested channels
        for idx, code in enumerate(sorted(final_codes)):

            channel_name= codebook[code]
            channel = channels[channel_name]

            y = np.asarray(channel.val_record)

            if contains_nan(y):
                print(channel_name + ' contains NaNs')

            if contains_inf(y):
                print(channel_name + 'contains infinite values')

            if x_axis == 'example':
                x = np.asarray(channel.example_record)
            elif x_axis == 'batche':
                x = np.asarray(channel.batch_record)
            elif x_axis == 'epoch':
                try:
                    x = np.asarray(channel.epoch_record)
                except AttributeError:
                    # older saved monitors won't have epoch_record
                    x = np.arange(len(channel.batch_record))
            elif x_axis == 'second':
                x = np.asarray(channel.time_record)
            elif x_axis == 'hour':
                x = np.asarray(channel.time_record) / 3600.
            else:
                assert False


            ax.plot( x,
                      y,
                      styles[idx % len(styles)],
                      marker = '.', # add point margers to lines
                      label = channel_name)

        plt.xlabel('# '+x_axis+'s')
        ax.ticklabel_format( scilimits = (-3,3), axis = 'both')

        handles, labels = ax.get_legend_handles_labels()
        lgd = ax.legend(handles, labels, loc='upper center',
                bbox_to_anchor=(0.5,-0.1))
        # 0.046 is the size of 1 legend box
        fig.subplots_adjust(bottom=0.11 + 0.046 * len(final_codes))

        if options.out is None:
          plt.show()
        else:
          plt.savefig(options.out)

        if not prompt:
            break
Example #28
0
def make_viewer(mat,
                grid_shape=None,
                patch_shape=None,
                activation=None,
                pad=None,
                is_color=False,
                rescale=True):
    """
    Given filters in rows, guesses dimensions of patches
    and nice dimensions for the PatchViewer and returns a PatchViewer
    containing visualizations of the filters.

    Parameters
    ----------
    mat : ndarray
        Values should lie in [-1, 1] if `rescale` is False.
        0. always indicates medium gray, with negative values drawn as
        blacker and positive values drawn as whiter.
        A matrix with each row being a different image patch, OR
        a 4D tensor in ('b', 0, 1, 'c') format.
        If matrix, we assume it was flattened using the same procedure as a
        ('b', 0, 1, 'c') DefaultViewConverter uses.
    grid_shape : tuple, optional
        A tuple of two ints specifying the shape of the grad in the
        PatchViewer, in (rows, cols) format. If not specified, this
        function does its best to choose an aesthetically pleasing
        value.
    patch_shape : tupe, optional
        A tuple of two ints specifying the shape of the patch.
        If `mat` is 4D, this function gets the patch shape from the shape of
        `mat`. If `mat` is 2D and patch_shape is not specified, this function
        assumes the patches are perfectly square.
    activation : iterable
        An iterable collection describing some kind of activation value
        associated with each patch. This is indicated with a border around the
        patch whose color intensity increases with activation value.
        The individual activation values may be single floats to draw one
        border or iterable collections of floats to draw multiple borders with
        differing intensities around the patch.
    pad : int, optional
        The amount of padding to add between patches in the displayed image.
    is_color : int
        If True, assume the images are in color.
        Note needed if `mat` is in ('b', 0, 1, 'c') format since we can just
        look at its shape[-1].
    rescale : bool
        If True, rescale each patch so that its highest magnitude pixel
        reaches a value of either 0 or 1 depending on the sign of that pixel.

    Returns
    -------
    patch_viewer : PatchViewer
        A PatchViewer containing the patches stored in `mat`.
    """

    num_channels = 1
    if is_color:
        num_channels = 3

    if grid_shape is None:
        grid_shape = PatchViewer.pick_shape(mat.shape[0])
    if mat.ndim > 2:
        patch_shape = mat.shape[1:3]
        topo_view = mat
        num_channels = mat.shape[3]
        is_color = num_channels > 1
    else:
        if patch_shape is None:
            assert mat.shape[1] % num_channels == 0
            patch_shape = PatchViewer.pick_shape(mat.shape[1] // num_channels,
                                                 exact=True)
            assert mat.shape[1] == (patch_shape[0] * patch_shape[1] *
                                    num_channels)
        topo_shape = (patch_shape[0], patch_shape[1], num_channels)
        view_converter = DefaultViewConverter(topo_shape)
        topo_view = view_converter.design_mat_to_topo_view(mat)
    rval = PatchViewer(grid_shape, patch_shape, pad=pad, is_color=is_color)
    for i in xrange(mat.shape[0]):
        if activation is not None:
            if hasattr(activation[0], '__iter__'):
                act = [a[i] for a in activation]
            else:
                act = activation[i]
        else:
            act = None

        patch = topo_view[i, :]

        rval.add_patch(patch, rescale=rescale, activation=act)
    return rval
Example #29
0
def stochastic_max_pool_x(x,
                          image_shape,
                          pool_shape=(2, 2),
                          pool_stride=(1, 1),
                          rng=None):
    """
    Parameters
    ----------
    x : theano 4-tensor
        in format (batch size, channels, rows, cols)
    image_shape : tuple
        avoid doing some of the arithmetic in theano
    pool_shape : tuple
        shape of the pool region (rows, cols)
    pool_stride : tuple
        strides between pooling regions (row stride, col stride)
    rng : theano random stream
    """

    r, c = image_shape
    pr, pc = pool_shape
    rs, cs = pool_stride
    global pool_size
    pool_size = pool_shape
    global stride_size
    stride_size = pool_stride
    batch = x.shape[0]
    channel = x.shape[1]
    rng = make_theano_rng(rng, 2022, which_method='multinomial')

    # Compute starting row of the last pool
    last_pool_r = last_pool(r, pr, rs) * rs
    # Compute number of rows needed in image for all indexes to work out
    required_r = last_pool_r + pr

    last_pool_c = last_pool(c, pc, cs) * cs
    required_c = last_pool_c + pc

    # final result shape
    res_r = int(numpy.floor(last_pool_r / rs)) + 1
    res_c = int(numpy.floor(last_pool_c / cs)) + 1

    # padding
    padded = tensor.alloc(0.0, batch, channel, required_r, required_c)
    #theano.tensor.alloc(value, *shape) - for allocating a new tensor with value filled with "value"

    x = tensor.set_subtensor(padded[:, :, 0:r, 0:c], x)
    #theano.tensor.set_subtensor(lval of = operator, rval of = operator) - for assigning a tensor to a subtensor of a tensor

    # unraveling
    window = tensor.alloc(0.0, batch, channel, res_r, res_c, pr, pc)

    # initializing window with proper values
    for row_within_pool in xrange(pr):
        row_stop = last_pool_r + row_within_pool + 1
        for col_within_pool in xrange(pc):
            col_stop = last_pool_c + col_within_pool + 1
            win_cell = x[:, :, row_within_pool:row_stop:rs,
                         col_within_pool:col_stop:cs]
            window = tensor.set_subtensor(
                window[:, :, :, :, row_within_pool, col_within_pool], win_cell)

    # find the norm
    norm = window.sum(axis=[4, 5])
    #tensor.sum(axis = []) - cal sum over given axes

    norm = tensor.switch(tensor.eq(norm, 0.0), 1.0, norm)
    '''
    theano.tensor.eq(a, b) - Returns a variable representing the result of logical equality (a==b)
    theano.tensor.switch(cond, ift, iff) - Returns a variable representing a switch between ift (iftrue) and iff (iffalse)
    Basically converting a zero norm to 1.0.
    '''
    norm = window / norm.dimshuffle(0, 1, 2, 3, 'x', 'x')
    #converting activation values to probabilities using below formula - pi = ai / sum(ai)

    # get prob
    prob = rng.multinomial(pvals=norm.reshape(
        (batch * channel * res_r * res_c, pr * pc)),
                           dtype='float32')
    # select
    res = (window * prob.reshape(
        (batch, channel, res_r, res_c, pr, pc))).max(axis=5).max(axis=4)

    return res
Example #30
0
    def __init__(self, which_set, center=False, shuffle=False,
                 binarize=False, start=None, stop=None,
                 axes=['b', 0, 1, 'c'],
                 preprocessor=None,
                 fit_preprocessor=False,
                 fit_test_preprocessor=False):
        self.args = locals()

        if which_set not in ['train', 'test']:
            if which_set == 'valid':
                raise ValueError(
                    "There is no such thing as the MNIST validation set. MNIST"
                    "consists of 60,000 train examples and 10,000 test"
                    "examples. If you wish to use a validation set you should"
                    "divide the train set yourself. The pylearn2 dataset"
                    "implements and will only ever implement the standard"
                    "train / test split used in the literature.")
            raise ValueError(
                'Unrecognized which_set value "%s".' % (which_set,) +
                '". Valid values are ["train","test"].')

        def dimshuffle(b01c):
            """
            .. todo::

                WRITEME
            """
            default = ('b', 0, 1, 'c')
            return b01c.transpose(*[default.index(axis) for axis in axes])

        if control.get_load_data():
            path = "${PYLEARN2_DATA_PATH}/sign24/"
            if which_set == 'train':
                im_path = path + 'train-images-idx3-ubyte'
                label_path = path + 'train-labels-idx1-ubyte'
            else:
                assert which_set == 'test'
                im_path = path + 't10k-images-idx3-ubyte'
                label_path = path + 't10k-labels-idx1-ubyte'
            # Path substitution done here in order to make the lower-level
            # mnist_ubyte.py as stand-alone as possible (for reuse in, e.g.,
            # the Deep Learning Tutorials, or in another package).
            im_path = serial.preprocess(im_path)
            label_path = serial.preprocess(label_path)

            # Locally cache the files before reading them
            datasetCache = cache.datasetCache
            im_path = datasetCache.cache_file(im_path)
            label_path = datasetCache.cache_file(label_path)

            topo_view = read_mnist_images(im_path, dtype='float32')
            y = np.atleast_2d(read_mnist_labels(label_path)).T
        else:
            if which_set == 'train':
                size = 15
            elif which_set == 'test':
                size = 5
            else:
                raise ValueError(
                    'Unrecognized which_set value "%s".' % (which_set,) +
                    '". Valid values are ["train","test"].')
            topo_view = np.random.rand(size, 28, 28)
            y = np.random.randint(0, 10, (size, 1))

        if binarize:
            topo_view = (topo_view > 0.5).astype('float32')

        y_labels = 24

        m, r, c = topo_view.shape
        assert r == 28
        assert c == 28
        topo_view = topo_view.reshape(m, r, c, 1)

        if which_set == 'train':
            assert m == 3576
        elif which_set == 'test':
            assert m == 1176
        else:
            assert False

        if center:
            topo_view -= topo_view.mean(axis=0)

        if shuffle:
            self.shuffle_rng = make_np_rng(
                None, [1, 2, 3], which_method="shuffle")
            for i in xrange(topo_view.shape[0]):
                j = self.shuffle_rng.randint(m)
                # Copy ensures that memory is not aliased.
                tmp = topo_view[i, :, :, :].copy()
                topo_view[i, :, :, :] = topo_view[j, :, :, :]
                topo_view[j, :, :, :] = tmp

                tmp = y[i:i + 1].copy()
                y[i] = y[j]
                y[j] = tmp

        super(MNIST, self).__init__(topo_view=dimshuffle(topo_view), y=y,
                                    axes=axes, y_labels=y_labels)

        assert not N.any(N.isnan(self.X))

        if start is not None:
            assert start >= 0
            if stop > self.X.shape[0]:
                raise ValueError('stop=' + str(stop) + '>' +
                                 'm=' + str(self.X.shape[0]))
            assert stop > start
            self.X = self.X[start:stop, :]
            if self.X.shape[0] != stop - start:
                raise ValueError("X.shape[0]: %d. start: %d stop: %d"
                                 % (self.X.shape[0], start, stop))
            if len(self.y.shape) > 1:
                self.y = self.y[start:stop, :]
            else:
                self.y = self.y[start:stop]
            assert self.y.shape[0] == stop - start

        if which_set == 'test':
            assert fit_test_preprocessor is None or \
                (fit_preprocessor == fit_test_preprocessor)

        if self.X is not None and preprocessor:
            preprocessor.apply(self, fit_preprocessor)
Example #31
0
def test_revisit():

    # Test that each call to monitor revisits exactly the same data

    BATCH_SIZE = 3
    MAX_BATCH_SIZE = 12
    BATCH_SIZE_STRIDE = 3
    NUM_BATCHES = 10
    num_examples = NUM_BATCHES * BATCH_SIZE

    monitoring_dataset = ArangeDataset(num_examples)

    for mon_batch_size in xrange(BATCH_SIZE, MAX_BATCH_SIZE + 1,
                                 BATCH_SIZE_STRIDE):
        nums = [1, 3, int(num_examples / mon_batch_size), None]

        for mode in sorted(_iteration_schemes):
            if mode == 'even_sequences' and nums is not None:
                # even_sequences iterator does not support specifying a fixed number
                # of minibatches.
                continue
            for num_mon_batches in nums:
                if num_mon_batches is None and mode in [
                        'random_uniform', 'random_slice'
                ]:
                    continue

                if has_uniform_batch_size(mode) and \
                   num_mon_batches is not None and \
                   num_mon_batches * mon_batch_size > num_examples:

                    num_mon_batches = int(num_examples / float(mon_batch_size))

                model = DummyModel(1)
                monitor = Monitor.get_monitor(model)

                try:
                    monitor.add_dataset(monitoring_dataset,
                                        mode,
                                        batch_size=mon_batch_size,
                                        num_batches=num_mon_batches)
                except TypeError:
                    monitor.add_dataset(monitoring_dataset,
                                        mode,
                                        batch_size=mon_batch_size,
                                        num_batches=num_mon_batches,
                                        seed=0)

                if has_uniform_batch_size(mode) and num_mon_batches is None:
                    num_mon_batches = int(num_examples / float(mon_batch_size))
                elif num_mon_batches is None:
                    num_mon_batches = int(
                        np.ceil(float(num_examples) / float(mon_batch_size)))

                batches = [None] * int(num_mon_batches)
                visited = [False] * int(num_mon_batches)

                batch_idx = shared(0)

                class RecorderAndValidator(object):
                    def __init__(self):
                        self.validate = False

                    def __call__(self, *data):
                        """ Initially, records the batches the monitor shows it.
                        When set to validate mode, makes sure the batches shown
                        on the second monitor call match those from the first."""
                        X, = data

                        idx = batch_idx.get_value()
                        batch_idx.set_value(idx + 1)

                        # Note: if the monitor starts supporting variable batch sizes,
                        # take this out. Maybe move it to a new test that the iterator's
                        # uneven property is set accurately
                        warnings.warn(
                            "TODO: add unit test that iterators uneven property is set correctly."
                        )
                        # assert X.shape[0] == mon_batch_size

                        if self.validate:
                            previous_batch = batches[idx]
                            assert not visited[idx]
                            visited[idx] = True
                            if not np.allclose(previous_batch, X):
                                print('Visited different data in batch', idx)
                                print(previous_batch)
                                print(X)
                                print('Iteration mode', mode)
                                assert False
                        else:
                            batches[idx] = X
                        # end if

                    # end __call__

                #end class

                prereq = RecorderAndValidator()

                monitor.add_channel(name='dummy',
                                    ipt=model.input_space.make_theano_batch(),
                                    val=0.,
                                    prereqs=[prereq],
                                    data_specs=(model.get_input_space(),
                                                model.get_input_source()))

                try:
                    monitor()
                except RuntimeError:
                    print('monitor raised RuntimeError for iteration mode',
                          mode)
                    raise

                assert None not in batches

                batch_idx.set_value(0)
                prereq.validate = True

                monitor()

                assert all(visited)
Example #32
0
def compute_likelihood_given_logz(nsamples, psamples, batch_size, energy_fn,
                                  inference_fn, log_z, test_x):
    """
    Compute test set likelihood as below, where q is the variational
    approximation to the posterior p(h1,h2|v).

        ln p(v) \approx \sum_h q(h) E(v,h1,h2) + H(q) - ln Z

    See section 3.2 of DBM paper for details.

    Parameters
    ----------
    nsamples : array-like object of theano shared variables
        Negative samples
    psamples : array-like object of theano shared variables
        Positive samples
    batch_size : scalar
        Size of a batch of samples
    energy_fn : theano.function
        Function which computes the (temperature 1) energy of the samples. This
        function should return a symbolic vector.
    inference_fn : theano.function
        Inference function for DBM. Function takes a T.matrix as input (data)
        and returns a list of length 'length(b_list)', where the i-th element
        is an ndarray containing approximate samples of layer i.
    log_z : scalar
        Estimate partition function of 'model'.
    test_x : numpy.ndarray
        Test set data, in dense design matrix format.

    Returns
    -------
    likelihood : scalar
        Negative log-likelihood of test data under the model
    """
    i = 0.
    likelihood = 0

    for i in xrange(0, len(test_x), batch_size):

        # Recast data as floatX and apply preprocessing if required
        x = numpy.array(test_x[i:numpy.minimum(test_x.shape[0], i +
                                               batch_size), :],
                        dtype=floatX)
        batch_size0 = len(x)
        if len(x) < batch_size:
            # concatenate x to have some dummy entries
            x = numpy.concatenate(
                (x, numpy.zeros(
                    (batch_size - len(x), x.shape[1]), dtype=floatX)),
                axis=0)

        # Perform inference
        inference_fn(x)

        # Entropy of h(q) adds contribution to variational lower-bound
        hq = 0
        for psample in psamples[1:]:
            temp = \
                - psample.get_value() * numpy.log(1e-5 + psample.get_value()) \
                - (1.-psample.get_value()) \
                * numpy.log(1. - psample.get_value() + 1e-5)
            hq += numpy.sum(temp, axis=1)

        # Copy into negative phase buffers to measure energy
        nsamples[0].set_value(x)
        for ii, psample in enumerate(psamples):
            if ii > 0:
                nsamples[ii].set_value(psample.get_value())

        # Compute sum of likelihood for current buffer
        x_likelihood = numpy.sum((-energy_fn(1.0) + hq - log_z)[:batch_size0])

        # Perform moving average of negative likelihood
        # Divide by len(x) and not bufsize, since last buffer might be smaller
        likelihood = (i * likelihood + x_likelihood) / (i + batch_size0)

    return likelihood
Example #33
0
    def train_batch(self, dataset, batch_size):
        """
        .. todo::

            WRITEME
        """
        #TODO-- this results in compilation happening every time learn is
        # called should cache the compilation results, including those
        # inside cg
        X = dataset.get_design_matrix()
        m = X.shape[0]
        assert X.shape[1] == self.nvis

        gamma = N.zeros((batch_size, self.nhid))
        cur_gamma = T.vector(name='cur_gamma')
        cur_v = T.vector(name='cur_v')
        recons = T.dot(cur_gamma, self.W)
        recons.name = 'recons'

        recons_diffs = cur_v - recons
        recons_diffs.name = 'recons_diffs'

        recons_diff_sq = T.sqr(recons_diffs)
        recons_diff_sq.name = 'recons_diff'

        recons_error = T.sum(recons_diff_sq)
        recons_error.name = 'recons_error'

        dict_dists = T.sum(T.sqr(self.W - cur_v), axis=1)
        dict_dists.name = 'dict_dists'

        abs_gamma = abs(cur_gamma)
        abs_gamma.name = 'abs_gamma'

        weighted_dists = T.dot(abs_gamma, dict_dists)
        weighted_dists.name = 'weighted_dists'

        penalty = self.coeff * weighted_dists
        penalty.name = 'penalty'

        #prevent directions of absolute flatness in the hessian
        #W_sq = T.sqr(self.W)
        #W_sq.name = 'W_sq'
        #debug =  T.sum(W_sq)
        debug = 1e-10 * T.sum(dict_dists)
        debug.name = 'debug'

        #J = debug
        J = recons_error + penalty + debug
        J.name = 'J'

        Jf = function([cur_v, cur_gamma], J)

        start = self.rng.randint(m - batch_size + 1)
        batch_X = X[start:start + batch_size, :]

        #TODO-- optimize gamma
        logger.info('optimizing gamma')
        for i in xrange(batch_size):
            #print str(i+1)+'/'+str(batch_size)
            gamma[i, :] = self.optimize_gamma(batch_X[i, :])

        logger.info('max min')
        logger.info(N.abs(gamma).min(axis=0).max())
        logger.info('min max')
        logger.info(N.abs(gamma).max(axis=0).max())

        #Optimize W
        logger.info('optimizing W')
        logger.warning("not tested since switching to Razvan's all-theano "
                       "implementation of linear cg")
        cg.linear_cg(J, [self.W], max_iters=3)

        err = 0.

        for i in xrange(batch_size):
            err += Jf(batch_X[i, :], gamma[i, :])
        assert not N.isnan(err)
        assert not N.isinf(err)
        logger.info('err: {0}'.format(err))
        return True