コード例 #1
0
ファイル: conv2d_c01b.py プロジェクト: CURG/pylearn2
    def lmul_T(self, x):
        """
        .. todo::

            WRITEME
        """

        check_cuda(str(type(self)) + ".lmul_T")

        assert x.dtype == self._filters.dtype

        op_axes = ('c', 0, 1, 'b')
        axes = self.output_axes
        if tuple(axes) != op_axes:
            x = x.dimshuffle(*[axes.index(ax) for ax in op_axes])

        x = gpu_contiguous(x)

        rval = ImageActs(pad=self.pad, partial_sum=self.partial_sum,
                         stride=self.kernel_stride[0])(x, self._filters,
                                 output_shape=self.input_shape)

        # Format the output based on the input space
        axes = self.input_axes
        assert len(axes) == 4

        if tuple(axes) != op_axes:
            rval = rval.dimshuffle(op_axes.index(axes[0]),
                                   op_axes.index(axes[1]),
                                   op_axes.index(axes[2]),
                                   op_axes.index(axes[3]))

        return rval
コード例 #2
0
    def lmul_T(self, x):
        """
        .. todo::

            WRITEME
        """

        check_cuda(str(type(self)) + ".lmul_T")

        assert x.dtype == self._filters.dtype

        op_axes = ('c', 0, 1, 'b')
        axes = self.output_axes
        if tuple(axes) != op_axes:
            x = x.dimshuffle(*[axes.index(ax) for ax in op_axes])

        x = gpu_contiguous(x)

        rval = ImageActs(pad=self.pad, partial_sum=self.partial_sum,
                         stride=self.kernel_stride[0])(x, self._filters,
                                 output_shape=self.input_shape)

        # Format the output based on the input space
        axes = self.input_axes
        assert len(axes) == 4

        if tuple(axes) != op_axes:
            rval = rval.dimshuffle(op_axes.index(axes[0]),
                                   op_axes.index(axes[1]),
                                   op_axes.index(axes[2]),
                                   op_axes.index(axes[3]))

        return rval
コード例 #3
0
ファイル: filter_acts.py プロジェクト: mengwanguc/pylearn2
    def grad(self, inputs, dout):
        """
        .. todo::

            WRITEME
        """
        images, filters = inputs

        if 'Cuda' not in str(type(images)):
            raise TypeError("inputs must be cuda")
        if 'Cuda' not in str(type(filters)):
            raise TypeError("filters must be cuda")

        dout, = dout
        dout = gpu_contiguous(dout)

        if 'Cuda' not in str(type(dout)):
            raise TypeError("output gradients must be cuda")

        ishape = images.shape[1:3]
        fshape = filters.shape[1:3]
        d_images = ImageActs(self.pad, self.partial_sum,
                             self.stride)(dout, filters, ishape)
        d_filters = WeightActs(self.pad, self.partial_sum,
                               self.stride)(images, dout, fshape)[0]
        return d_images, d_filters
コード例 #4
0
ファイル: cc_layers.py プロジェクト: smartyining/anna
    def __init__(self, input_layer, mirror_layer, nonlinearity=None):
        """
        Only the valid border mode is supported.

        n_filters should be a multiple of 16
        """

        self.mirror_layer = mirror_layer

        self.input_layer = input_layer
        self.input_shape = self.input_layer.get_output_shape()
        n_filters = self.input_shape[0]

        if nonlinearity:
            self.nonlinearity = nonlinearity
        else:
            self.nonlinearity = mirror_layer.nonlinearity

        self.n_channels = mirror_layer.n_channels
        self.n_filters = mirror_layer.n_filters
        self.filter_size = mirror_layer.filter_size
        self.weights_std = mirror_layer.weights_std
        self.init_bias_value = mirror_layer.init_bias_value
        self.stride = mirror_layer.stride
        self.dropout = mirror_layer.dropout
        self.partial_sum = mirror_layer.partial_sum
        self.pad = mirror_layer.pad
        self.untie_biases = mirror_layer.untie_biases

        self.mb_size = self.input_layer.mb_size

        self.filter_shape = mirror_layer.filter_shape

        self.trainable = False
        self.W = layers.shared_single(4)

        if self.untie_biases:
            self.b = layers.shared_single(3)
        else:
            self.b = layers.shared_single(1)

        # self.params = [self.W, self.b]
        self.params = [self.W, self.b]
        self.bias_params = [self.b]

        self.data_order = layers.data_order.type2

        assert (len(self.input_layer.get_output_shape()) == 4), \
            'Input must have 4 dimensions.'

        assert (self.input_layer.data_order == self.data_order), \
            'Input data order does not match this layer\'s data order.'

        self.reset_params()

        self.image_acts_op = ImageActs(stride=self.stride,
                                       partial_sum=self.partial_sum,
                                       pad=self.pad)
コード例 #5
0
ファイル: cc_layers.py プロジェクト: smartyining/anna
    def __init__(self, input_layer, mirror_layer, nonlinearity=None):
        """
        Only the valid border mode is supported.

        n_filters should be a multiple of 16
        """

        self.mirror_layer = mirror_layer

        self.input_layer = input_layer
        self.input_shape = self.input_layer.get_output_shape()
        n_filters = self.input_shape[0]

        if nonlinearity:
            self.nonlinearity = nonlinearity
        else:
            self.nonlinearity = mirror_layer.nonlinearity

        self.n_channels = mirror_layer.n_channels
        self.n_filters = mirror_layer.n_filters
        self.filter_size = mirror_layer.filter_size
        self.weights_std = mirror_layer.weights_std
        self.init_bias_value = mirror_layer.init_bias_value
        self.stride = mirror_layer.stride
        self.dropout = mirror_layer.dropout
        self.partial_sum = mirror_layer.partial_sum
        self.pad = mirror_layer.pad
        self.untie_biases = mirror_layer.untie_biases
        # if untie_biases == True, each position in the output map has its own
        # bias (as opposed to having the same bias everywhere for a filter)
        self.mb_size = self.input_layer.mb_size

        self.filter_shape = mirror_layer.filter_shape

        self.trainable = False
        self.W = mirror_layer.W

        self.b = mirror_layer.b

        # self.params = [self.W, self.b]
        self.params = []
        self.bias_params = [self.b]

        self.data_order = layers.data_order.type2

        assert (len(self.input_layer.get_output_shape()) == 4), \
            'Input must have 4 dimensions.'

        assert (self.input_layer.data_order == self.data_order), \
            'Input data order does not match this layer\'s data order.'

        self.image_acts_op = ImageActs(stride=self.stride,
                                       partial_sum=self.partial_sum,
                                       pad=self.pad)
コード例 #6
0
    def grad(self, inputs, dout):

        images, filters = inputs

        if 'Cuda' not in str(type(images)):
            raise TypeError("inputs must be cuda")
        if 'Cuda' not in str(type(filters)):
            raise TypeError("filters must be cuda")

        dout, = dout
        dout = gpu_contiguous(dout)

        if 'Cuda' not in str(type(dout)):
            raise TypeError("output gradients must be cuda")

        d_images = ImageActs(self.pad, self.partial_sum)(dout, filters)
        d_filters = WeightActs(self.pad, self.partial_sum)(images, dout)[0]

        return d_images, d_filters
コード例 #7
0
ファイル: test_img_acts.py プロジェクト: alito/pylearn2
def test_match_full_conv():

    # Tests that running ImageActs with no padding is the same as running
    # theano's conv2D in full mode after flipping the kernel and tranposing
    # the output and input channels
    # In other words, if convolution computes H=XK, we now compute
    # R=HK^T

    rng = np.random.RandomState([2013, 1, 29])

    batch_size = 2
    rows = 6
    cols = 7
    channels = 3
    filter_rows = 5
    filter_cols = filter_rows
    num_filters = 16

    hid_acts = shared(rng.uniform(-1., 1., (num_filters,
                                            rows - filter_rows + 1,
                                            cols - filter_cols + 1,
                                            batch_size)
    ).astype('float32'), name='hidacts')

    filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
        filter_cols, num_filters)).astype('float32'), name='filters')

    gpu_images = gpu_from_host(hid_acts)
    gpu_filters = gpu_from_host(filters)

    output = ImageActs()(gpu_images, gpu_filters, as_tensor_variable((6, 7)))
    output = host_from_gpu(output)

    images_bc01 = hid_acts.dimshuffle(3,0,1,2)
    filters_bc01 = filters.dimshuffle(3,0,1,2)
    # need to tranpose the kernel stack to do imgActs rather than filterActs
    filters_bc01 = filters_bc01.dimshuffle(1, 0, 2, 3)
    # In order to do the transpose operation, we must flip the kernels
    # But in theano's conv2d, the kernels get flipped anyway
    # so in this case, we do not flip the kernel

    output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='full')

    output_conv2d = output_conv2d.dimshuffle(1,2,3,0)

    f = function([], [output, output_conv2d])

    output, output_conv2d = f()

    warnings.warn("""test_match_full_conv success criterion is not very strict. Can we verify that this is OK?
                     One possibility is that theano is numerically unstable and Alex's code is better.
                     Probably theano CPU 64 bit is OK but it's worth checking the others.""")
    if np.abs(output - output_conv2d).max() > 2.4e-6:
        assert type(output) == type(output_conv2d)
        assert output.dtype == output_conv2d.dtype
        if output.shape != output_conv2d.shape:
            print 'cuda-convnet shape: ',output.shape
            print 'theano shape: ',output_conv2d.shape
            assert False
        err = np.abs(output - output_conv2d)
        print 'absolute error range: ', (err.min(), err.max())
        print 'mean absolute error: ', err.mean()
        print 'cuda-convnet value range: ', (output.min(), output.max())
        print 'theano value range: ', (output_conv2d.min(), output_conv2d.max())
        assert False
コード例 #8
0
def test_match_full_conv_grad():

    # Tests that the gradient of ImageActs with no padding is the same as the
    # gradient of
    # theano's conv2D in full mode after flipping the kernel and tranposing
    # the output and input channels

    rng = np.random.RandomState([2013, 1, 29])

    batch_size = 2
    rows = 6
    cols = 7
    channels = 3
    filter_rows = 5
    filter_cols = filter_rows
    num_filters = 16

    hid_acts = shared(rng.uniform(
        -1., 1., (num_filters, rows - filter_rows + 1, cols - filter_cols + 1,
                  batch_size)).astype('float32'),
                      name='hidacts')

    filters = shared(rng.uniform(
        -1., 1.,
        (channels, filter_rows, filter_cols, num_filters)).astype('float32'),
                     name='filters')

    gpu_images = gpu_from_host(hid_acts)
    gpu_filters = gpu_from_host(filters)

    output = ImageActs()(gpu_images, gpu_filters, as_tensor_variable((6, 7)))
    output = host_from_gpu(output)

    images_bc01 = hid_acts.dimshuffle(3, 0, 1, 2)
    filters_bc01 = filters.dimshuffle(3, 0, 1, 2)
    # need to tranpose the kernel stack to do imgActs rather than filterActs
    filters_bc01 = filters_bc01.dimshuffle(1, 0, 2, 3)
    # In order to do the transpose operation, we must flip the kernels
    # But in theano's conv2d, the kernels get flipped anyway
    # so in this case, we do not flip the kernel

    output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='full')

    output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0)

    theano_rng = MRG_RandomStreams(5 * 10 * 2013)

    random = theano_rng.normal(size=output_conv2d.shape,
                               dtype=output_conv2d.dtype)

    projected = (output * random).sum()
    projected_conv_2d = (output_conv2d * random).sum()

    grads = T.grad(projected, [hid_acts, filters]) + T.grad(
        projected_conv_2d, [hid_acts, filters])

    f = function([], grads)

    gi, gf, gi_th, gf_th = f()

    assert gi.shape == gi_th.shape
    diff = np.abs(gi - gi_th).max()
    if diff > 2.9e-6:
        assert False

    diff = np.abs(gf - gf_th).max()
    if diff > 1e-6:
        raise AssertionError(diff)
コード例 #9
0
def test_match_full_conv():

    # Tests that running ImageActs with no padding is the same as running
    # theano's conv2D in full mode after flipping the kernel and tranposing
    # the output and input channels
    # In other words, if convolution computes H=XK, we now compute
    # R=HK^T

    rng = np.random.RandomState([2013, 1, 29])

    batch_size = 2
    rows = 6
    cols = 7
    channels = 3
    filter_rows = 5
    filter_cols = filter_rows
    num_filters = 16

    hid_acts = shared(rng.uniform(
        -1., 1., (num_filters, rows - filter_rows + 1, cols - filter_cols + 1,
                  batch_size)).astype('float32'),
                      name='hidacts')

    filters = shared(rng.uniform(
        -1., 1.,
        (channels, filter_rows, filter_cols, num_filters)).astype('float32'),
                     name='filters')

    gpu_images = gpu_from_host(hid_acts)
    gpu_filters = gpu_from_host(filters)

    output = ImageActs()(gpu_images, gpu_filters, as_tensor_variable((6, 7)))
    output = host_from_gpu(output)

    images_bc01 = hid_acts.dimshuffle(3, 0, 1, 2)
    filters_bc01 = filters.dimshuffle(3, 0, 1, 2)
    # need to tranpose the kernel stack to do imgActs rather than filterActs
    filters_bc01 = filters_bc01.dimshuffle(1, 0, 2, 3)
    # In order to do the transpose operation, we must flip the kernels
    # But in theano's conv2d, the kernels get flipped anyway
    # so in this case, we do not flip the kernel

    output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='full')

    output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0)

    f = function([], [output, output_conv2d])

    output, output_conv2d = f()

    warnings.warn(
        """test_match_full_conv success criterion is not very strict. Can we verify that this is OK?
                     One possibility is that theano is numerically unstable and Alex's code is better.
                     Probably theano CPU 64 bit is OK but it's worth checking the others."""
    )
    if np.abs(output - output_conv2d).max() > 2.4e-6:
        assert type(output) == type(output_conv2d)
        assert output.dtype == output_conv2d.dtype
        if output.shape != output_conv2d.shape:
            print 'cuda-convnet shape: ', output.shape
            print 'theano shape: ', output_conv2d.shape
            assert False
        err = np.abs(output - output_conv2d)
        print 'absolute error range: ', (err.min(), err.max())
        print 'mean absolute error: ', err.mean()
        print 'cuda-convnet value range: ', (output.min(), output.max())
        print 'theano value range: ', (output_conv2d.min(),
                                       output_conv2d.max())
        assert False
コード例 #10
0
def benchmark(n_imgs, n_channels, img_shape, n_filters, filter_shape, pad):
    print('\nn_imgs: %i, n_channels: %i, img_shape: (%i, %i), ' %
          ((n_imgs, n_channels) + img_shape) +
          'n_filters: %i, filter_shape: (%i, %i), pad: %i' %
          ((n_filters, ) + filter_shape + (pad, )))

    # Setup arrays
    padding = (pad, pad)
    strides = (1, 1)
    img_h, img_w = img_shape
    filter_h, filter_w = filter_shape
    convout_h = img_h + 2 * pad - filter_h + 1
    convout_w = img_w + 2 * pad - filter_w + 1

    imgs_bc01_shape = (n_imgs, n_channels, img_h, img_w)
    filters_bc01_shape = (n_filters, n_channels, filter_h, filter_w)

    imgs_bc01 = np.random.randn(n_imgs, n_channels, img_h, img_w)
    imgs_c01b = np.transpose(imgs_bc01, (1, 2, 3, 0))
    filters_fc01 = np.random.randn(n_filters, n_channels, filter_h, filter_w)
    filters_c01f = np.transpose(filters_fc01, (1, 2, 3, 0))
    convout_bc01 = np.random.randn(n_imgs, n_filters, convout_h, convout_w)
    convout_c01b = np.transpose(convout_bc01, (1, 2, 3, 0))

    imgs_bc01_t = theano.shared(imgs_bc01.astype(theano.config.floatX))
    imgs_c01b_t = theano.shared(imgs_c01b.astype(theano.config.floatX))
    filters_fc01_t = theano.shared(filters_fc01.astype(theano.config.floatX))
    filters_c01f_t = theano.shared(filters_c01f.astype(theano.config.floatX))
    convout_bc01_t = theano.shared(convout_bc01.astype(theano.config.floatX))
    convout_c01b_t = theano.shared(convout_c01b.astype(theano.config.floatX))
    imgs_bc01_ca = ca.array(imgs_bc01)
    filters_fc01_ca = ca.array(filters_fc01)
    convout_bc01_ca = ca.array(convout_bc01)

    # Forward propagation
    print('fprop')
    convout_cc_op = FilterActs(stride=1, partial_sum=4, pad=pad)
    convout_cc_expr = convout_cc_op(imgs_c01b_t, filters_c01f_t)
    convout_cc_fun = theano.function([], convout_cc_expr)
    convout_cc = convout_cc_fun()
    convout_cc = np.transpose(convout_cc, (3, 0, 1, 2))

    def convout_ca_fun():
        convout = ca.nnet.conv_bc01(imgs_bc01_ca, filters_fc01_ca, padding,
                                    strides)
        return convout

    convout_ca = np.array(convout_ca_fun())
    print('         correct: ' + str(allclose(convout_ca, convout_cc)))
    duration_cc = avg_running_time(convout_cc_fun)
    duration_ca = avg_running_time(convout_ca_fun)
    print('   avg. duration: cuda_convnet: %.4f  ca: %.4f' %
          (duration_cc, duration_ca))
    print('         speedup: %.2f' % (duration_cc / duration_ca))
    del convout_cc_op
    del convout_cc_expr
    del convout_cc_fun

    #     Back propagation, imgs
    print('bprop_imgs')
    dimgs_cc_op = ImageActs(stride=1, partial_sum=1, pad=pad)
    dimgs_cc_expr = dimgs_cc_op(convout_c01b_t, filters_c01f_t)
    dimgs_cc_fun = theano.function([], dimgs_cc_expr)
    dimgs_cc = dimgs_cc_fun()
    dimgs_cc = np.transpose(dimgs_cc, (3, 0, 1, 2))

    def dimgs_ca_fun():
        return ca.nnet.conv_bc01_bprop_imgs(filters_fc01_ca, convout_bc01_ca,
                                            img_shape, padding, strides)

    dimgs_ca = np.array(dimgs_ca_fun())
    print('         correct: ' + str(allclose(dimgs_ca, dimgs_cc)))
    duration_cc = avg_running_time(dimgs_cc_fun)
    duration_ca = avg_running_time(dimgs_ca_fun)
    print('   avg. duration: cuda_convnet: %.4f  ca: %.4f' %
          (duration_cc, duration_ca))
    print('         speedup: %.2f' % (duration_cc / duration_ca))
    del dimgs_cc_op
    del dimgs_cc_expr
    del dimgs_cc_fun

    # Back propagation, filters
    dfilters_cc_op = WeightActs(stride=1, partial_sum=1, pad=pad)
    dfilters_cc_expr = dfilters_cc_op(imgs_c01b_t, convout_c01b_t,
                                      T.as_tensor_variable(filter_shape))
    dfilters_cc_fun = theano.function([], dfilters_cc_expr)
    dfilters_cc = dfilters_cc_fun()[0]
    dfilters_cc = np.transpose(dfilters_cc, (3, 0, 1, 2))

    def dfilters_ca_fun():
        return ca.nnet.conv_bc01_bprop_filters(imgs_bc01_ca, convout_bc01_ca,
                                               filter_shape, padding, strides)

    dfilters_ca = np.array(dfilters_ca_fun())

    print('bprop_filters')
    print('         correct: ' + str(allclose(dfilters_ca, dfilters_cc)))
    duration_cc = avg_running_time(dfilters_cc_fun)
    duration_ca = avg_running_time(dfilters_ca_fun)
    print('   avg. duration: cuda_convnet: %.4f  ca: %.4f' %
          (duration_cc, duration_ca))
    print('         speedup: %.2f' % (duration_cc / duration_ca))