Exemple #1
0
    def lmul(self, x):
        """
        dot(x, A)
        aka, do convolution with input image x

        """

        check_cuda(str(type(self)) + ".lmul")
        # TODO Why is it CPU??
        print "Por que?!?!", type(x)
        cpu = "Cuda" not in str(type(x))
        if cpu:
            x = gpu_from_host(x)

        assert x.ndim == 5
        x_axes = self.input_axes
        assert len(x_axes) == 5

        op_axes = ("c", 0, 1, "t", "b")
        if tuple(x_axes) != op_axes:
            print "ssssssssssssssss"
            x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])

        _x_4d_shape = (
            self.signal_shape[0],
            self.signal_shape[1],
            self.signal_shape[2],
            self.signal_shape[3] * self.signal_shape[4],
        )

        x = x.reshape(_x_4d_shape)

        x = gpu_contiguous(x)

        rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])(x, self._filters)

        if cpu:
            rval = host_from_gpu(rval)

        rval = rval.reshape(
            (
                self.filter_shape[3],
                self.filter_shape[4],
                rval.shape[1],
                rval.shape[2],
                self.signal_shape[3],
                self.signal_shape[4],
            )
        )

        rval = diagonal_subtensor(rval, 4, 0).sum(axis=0)

        # Format the output based on the output space
        rval_axes = self.output_axes
        assert len(rval_axes) == 5

        if tuple(rval_axes) != op_axes:
            rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes])

        return rval
Exemple #2
0
def make_funcs(batch_size, rows, cols, channels, filter_rows, num_filters):
    rng = np.random.RandomState([2012, 10, 9])

    filter_cols = filter_rows

    base_image_value = rng.uniform(-1.0, 1.0, (channels, rows, cols, batch_size)).astype("float32")
    base_filters_value = rng.uniform(-1.0, 1.0, (channels, filter_rows, filter_cols, num_filters)).astype("float32")
    images = shared(base_image_value)
    filters = shared(base_filters_value, name="filters")

    # bench.py should always be run in gpu mode so we should not need a gpu_from_host here
    output = FilterActs()(images, filters)

    output_shared = shared(output.eval())

    cuda_convnet = function([], updates={output_shared: output})
    cuda_convnet.name = "cuda_convnet"

    images_bc01v = base_image_value.transpose(3, 0, 1, 2)
    filters_bc01v = base_filters_value.transpose(3, 0, 1, 2)
    filters_bc01v = filters_bc01v[:, :, ::-1, ::-1]

    images_bc01 = shared(images_bc01v)
    filters_bc01 = shared(filters_bc01v)

    output_conv2d = conv2d(
        images_bc01, filters_bc01, border_mode="valid", image_shape=images_bc01v.shape, filter_shape=filters_bc01v.shape
    )

    output_conv2d_shared = shared(output_conv2d.eval())

    baseline = function([], updates={output_conv2d_shared: output_conv2d})
    baseline.name = "baseline"

    return cuda_convnet, baseline
Exemple #3
0
def test_match_valid_conv():

    # Tests that running FilterActs with no padding is the same as running
    # theano's conv2D in valid mode

    rng = np.random.RandomState([2012, 10, 9])

    batch_size = 5
    rows = 10
    cols = 9
    channels = 3
    filter_rows = 4
    filter_cols = filter_rows
    num_filters = 16

    images = shared(rng.uniform(
        -1., 1., (channels, rows, cols, batch_size)).astype('float32'),
                    name='images')
    filters = shared(rng.uniform(
        -1., 1.,
        (channels, filter_rows, filter_cols, num_filters)).astype('float32'),
                     name='filters')

    gpu_images = gpu_from_host(images)
    gpu_filters = gpu_from_host(filters)

    output = FilterActs()(gpu_images, gpu_filters)
    output = host_from_gpu(output)

    images_bc01 = images.dimshuffle(3, 0, 1, 2)
    filters_bc01 = filters.dimshuffle(3, 0, 1, 2)
    filters_bc01 = filters_bc01[:, :, ::-1, ::-1]

    output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid')

    output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0)

    f = function([], [output, output_conv2d])

    output, output_conv2d = f()

    warnings.warn(
        """test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
                     One possibility is that theano is numerically unstable and Alex's code is better.
                     Probably theano CPU 64 bit is OK but it's worth checking the others."""
    )
    if np.abs(output - output_conv2d).max() > 2.4e-6:
        assert type(output) == type(output_conv2d)
        assert output.dtype == output_conv2d.dtype
        if output.shape != output_conv2d.shape:
            print 'cuda-convnet shape: ', output.shape
            print 'theano shape: ', output_conv2d.shape
            assert False
        err = np.abs(output - output_conv2d)
        print 'absolute error range: ', (err.min(), err.max())
        print 'mean absolute error: ', err.mean()
        print 'cuda-convnet value range: ', (output.min(), output.max())
        print 'theano value range: ', (output_conv2d.min(),
                                       output_conv2d.max())
        assert False
def test_grad():

    rng = np.random.RandomState([2012, 10, 9])

    batch_size = 5
    rows = 10
    cols = 9
    channels = 3
    filter_rows = 4
    filter_cols = filter_rows
    num_filters = 16

    images = shared(rng.uniform(-1.0, 1.0, (channels, rows, cols, batch_size)).astype("float32"), name="images")
    filters = shared(
        rng.uniform(-1.0, 1.0, (channels, filter_rows, filter_cols, num_filters)).astype("float32"), name="filters"
    )

    gpu_images = gpu_from_host(images)
    gpu_filters = gpu_from_host(filters)

    output = FilterActs()(gpu_images, gpu_filters)
    output = host_from_gpu(output)
    # XXX: use verify_grad
    output_grad = grad(output.sum(), images)

    images_bc01 = images.dimshuffle(3, 0, 1, 2)
    filters_bc01 = filters.dimshuffle(3, 0, 1, 2)
    filters_bc01 = filters_bc01[:, :, ::-1, ::-1]

    output_conv2d = conv2d(images_bc01, filters_bc01, border_mode="valid")

    output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0)
    # XXX: use verify_grad
    output_conv2d_grad = grad(output_conv2d.sum(), images)
    f = function([], [output_grad, output_conv2d_grad])

    output_grad, output_conv2d_grad = f()

    warnings.warn(
        """test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
                     One possibility is that theano is numerically unstable and Alex's code is better.
                     Probably theano CPU 64 bit is OK but it's worth checking the others."""
    )
    if np.abs(output_grad - output_conv2d_grad).max() > 7.7e-6:
        assert type(output_grad) == type(output_conv2d_grad)
        assert output_grad.dtype == output_conv2d_grad.dtype
        if output_grad.shape != output_conv2d_grad.shape:
            print "cuda-convnet shape: ", output_grad.shape
            print "theano shape: ", output_conv2d_grad.shape
            assert False
        err = np.abs(output_grad - output_conv2d_grad)
        print "absolute error range: ", (err.min(), err.max())
        print "mean absolute error: ", err.mean()
        print "cuda-convnet value range: ", (output_grad.min(), output_grad.max())
        print "theano value range: ", (output_conv2d_grad.min(), output_conv2d_grad.max())
        assert False
Exemple #5
0
def test_match_valid_conv():

    # Tests that running FilterActs with no padding is the same as running
    # theano's conv2D in valid mode

    rng = np.random.RandomState([2012,10,9])

    batch_size = 5
    rows = 10
    cols = 9
    channels = 3
    filter_rows = 4
    filter_cols = filter_rows
    num_filters = 16

    images = shared(rng.uniform(-1., 1., (channels, rows, cols,
        batch_size)).astype('float32'), name='images')
    filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
        filter_cols, num_filters)).astype('float32'), name='filters')

    gpu_images = gpu_from_host(images)
    gpu_filters = gpu_from_host(filters)

    output = FilterActs()(gpu_images, gpu_filters)
    output = host_from_gpu(output)

    images_bc01 = images.dimshuffle(3,0,1,2)
    filters_bc01 = filters.dimshuffle(3,0,1,2)
    filters_bc01 = filters_bc01[:,:,::-1,::-1]

    output_conv2d = conv2d(images_bc01, filters_bc01,
            border_mode='valid')

    output_conv2d = output_conv2d.dimshuffle(1,2,3,0)

    try:
        f = function([], [output, output_conv2d])
    except:
        raise KnownFailureTest("cuda-convnet code depends on an unmerged theano feature.")

    output, output_conv2d = f()

    warnings.warn("test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?")
    if np.abs(output - output_conv2d).max() > 2.4e-6:
        assert type(output) == type(output_conv2d)
        assert output.dtype == output_conv2d.dtype
        if output.shape != output_conv2d.shape:
            print 'cuda-convnet shape: ',output.shape
            print 'theano shape: ',output_conv2d.shape
            assert False
        err = np.abs(output - output_conv2d)
        print 'absolute error range: ', (err.min(), err.max())
        print 'mean absolute error: ', err.mean()
        print 'cuda-convnet value range: ', (output.min(), output.max())
        print 'theano value range: ', (output_conv2d.min(), output_conv2d.max())
        assert False
def test_match_valid_conv_strided():

    # Tests that running FilterActs with stride is the same as running
    # theano's conv2D in valid mode and then downsampling

    rng = np.random.RandomState([2012,10,9])

    batch_size = 5
    rows = 9
    cols = 9
    channels = 3
    filter_rows = 3
    filter_cols = filter_rows
    stride = 3
    num_filters = 16

    images = shared(rng.uniform(-1., 1., (channels, rows, cols,
        batch_size)).astype('float32'), name='images')
    filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
        filter_cols, num_filters)).astype('float32'), name='filters')

    gpu_images = gpu_from_host(images)
    gpu_filters = gpu_from_host(filters)

    output = FilterActs(stride=stride)(gpu_images, gpu_filters)
    output = host_from_gpu(output)

    images_bc01 = images.dimshuffle(3,0,1,2)
    filters_bc01 = filters.dimshuffle(3,0,1,2)
    filters_bc01 = filters_bc01[:,:,::-1,::-1]

    output_conv2d = conv2d(images_bc01, filters_bc01,
            border_mode='valid', subsample=(stride, stride))

    output_conv2d_orig = output_conv2d.dimshuffle(1,2,3,0)
    output_conv2d = output_conv2d_orig  # [:, ::stride, ::stride, :]
    f = function([], [output, output_conv2d, output_conv2d_orig])

    output, output_conv2d, output_conv2d_orig = f()

    warnings.warn("""test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
                     One possibility is that theano is numerically unstable and Alex's code is better.
                     Probably theano CPU 64 bit is OK but it's worth checking the others.""")
    if np.abs(output - output_conv2d).max() > 2.4e-6:
        assert type(output) == type(output_conv2d)
        assert output.dtype == output_conv2d.dtype
        if output.shape != output_conv2d.shape:
            print 'cuda-convnet shape: ',output.shape
            print 'theano shape: ',output_conv2d.shape
            assert False
        err = np.abs(output - output_conv2d)
        print 'absolute error range: ', (err.min(), err.max())
        print 'mean absolute error: ', err.mean()
        print 'cuda-convnet value range: ', (output.min(), output.max())
        print 'theano value range: ', (output_conv2d.min(), output_conv2d.max())
        assert False
Exemple #7
0
    def lmul(self, x):
        """
        dot(x, A)
        aka, do convolution with input image x

        """

        check_cuda(str(type(self)) + ".lmul")
        # TODO Why is it CPU??
        print 'Por que?!?!', type(x)
        cpu = 'Cuda' not in str(type(x))
        if cpu:
            x = gpu_from_host(x)

        assert x.ndim == 5
        x_axes = self.input_axes
        assert len(x_axes) == 5

        op_axes = ('c', 0, 1, 't', 'b')
        if tuple(x_axes) != op_axes:
            print 'ssssssssssssssss'
            x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])

        _x_4d_shape = (self.signal_shape[0], self.signal_shape[1],
                       self.signal_shape[2],
                       self.signal_shape[3] * self.signal_shape[4])

        x = x.reshape(_x_4d_shape)

        x = gpu_contiguous(x)

        rval = FilterActs(self.pad, self.partial_sum,
                          self.kernel_stride[0])(x, self._filters)

        if cpu:
            rval = host_from_gpu(rval)

        rval = rval.reshape(
            (self.filter_shape[3], self.filter_shape[4], rval.shape[1],
             rval.shape[2], self.signal_shape[3], self.signal_shape[4]))

        rval = diagonal_subtensor(rval, 4, 0).sum(axis=0)

        # Format the output based on the output space
        rval_axes = self.output_axes
        assert len(rval_axes) == 5

        if tuple(rval_axes) != op_axes:
            rval = rval.dimshuffle(
                *[op_axes.index(axis) for axis in rval_axes])

        return rval
Exemple #8
0
    def lmul(self, x):
        """
        .. todo::

            WRITEME properly

        dot(x, A)
        aka, do convolution with input image x
        """

        check_cuda(str(type(self)) + ".lmul")

        cpu = 'Cuda' not in str(type(x))

        if cpu:
            x = gpu_from_host(x)

        # x must be formatted as channel, topo dim 0, topo dim 1, batch_index
        # for use with FilterActs
        assert x.ndim == 4
        x_axes = self.input_axes
        assert len(x_axes) == 4

        op_axes = ('c', 0, 1, 'b')

        if tuple(x_axes) != op_axes:
            x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])

        x = gpu_contiguous(x)

        # Patch old pickle files.
        if not hasattr(self, 'kernel_stride'):
            self.kernel_stride = (1, 1)
        rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])(
            x,
            self._filters
        )

        # Format the output based on the output space
        rval_axes = self.output_axes
        assert len(rval_axes) == 4

        if cpu:
            rval = host_from_gpu(rval)

        if tuple(rval_axes) != op_axes:
            rval = rval.dimshuffle(*[op_axes.index(axis)
                                     for axis in rval_axes])

        return rval
Exemple #9
0
    def lmul(self, x):
        """
        .. todo::

            WRITEME properly

        dot(x, A)
        aka, do convolution with input image x
        """

        check_cuda(str(type(self)) + ".lmul")

        cpu = 'Cuda' not in str(type(x))

        if cpu:
            x = gpu_from_host(x)

        # x must be formatted as channel, topo dim 0, topo dim 1, batch_index
        # for use with FilterActs
        assert x.ndim == 4
        x_axes = self.input_axes
        assert len(x_axes) == 4

        op_axes = ('c', 0, 1, 'b')

        if tuple(x_axes) != op_axes:
            x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])

        x = gpu_contiguous(x)

        # Patch old pickle files.
        if not hasattr(self, 'kernel_stride'):
            self.kernel_stride = (1, 1)
        rval = FilterActs(self.pad, self.partial_sum, self.kernel_stride[0])(
            x,
            self._filters
        )

        # Format the output based on the output space
        rval_axes = self.output_axes
        assert len(rval_axes) == 4

        if cpu:
            rval = host_from_gpu(rval)

        if tuple(rval_axes) != op_axes:
            rval = rval.dimshuffle(*[op_axes.index(axis)
                                     for axis in rval_axes])

        return rval
def make_funcs(batch_size, rows, cols, channels, filter_rows, num_filters):
    rng = np.random.RandomState([2012, 10, 9])

    filter_cols = filter_rows

    base_image_value = rng.uniform(
        -1., 1., (channels, rows, cols, batch_size)).astype('float32')
    base_filters_value = rng.uniform(
        -1., 1.,
        (channels, filter_rows, filter_cols, num_filters)).astype('float32')
    images = shared(base_image_value)
    filters = shared(base_filters_value, name='filters')

    # bench.py should always be run in gpu mode so we should not need a gpu_from_host here
    layer_1_detector = FilterActs()(images, filters)

    layer_1_pooled_fake = layer_1_detector[:, 0:layer_1_detector.shape[0]:2,
                                           0:layer_1_detector.shape[1]:2, :]

    base_filters2_value = rng.uniform(
        -1., 1.,
        (num_filters, filter_rows, filter_cols, num_filters)).astype('float32')
    filters2 = shared(base_filters_value, name='filters')

    layer_2_detector = FilterActs()(images, filters2)

    output = layer_2_detector

    output_shared = shared(output.eval())

    cuda_convnet = function([], updates={output_shared: output})
    cuda_convnet.name = 'cuda_convnet'

    images_bc01 = base_image_value.transpose(3, 0, 1, 2)
    filters_bc01 = base_filters_value.transpose(3, 0, 1, 2)
    filters_bc01 = filters_bc01[:, :, ::-1, ::-1]

    images_bc01 = shared(images_bc01)
    filters_bc01 = shared(filters_bc01)

    output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid')

    output_conv2d_shared = shared(output_conv2d.eval())

    baseline = function([], updates={output_conv2d_shared: output_conv2d})
    baseline.name = 'baseline'

    return cuda_convnet, baseline
Exemple #11
0
    def grad(self, inputs, g_outputs):
        """
        .. todo::

            WRITEME
        """
        hid_acts, filters, output_shape = inputs
        g_images, = g_outputs
        g_images = as_cuda_ndarray_variable(g_images)
        assert not isinstance(g_images, list)

        global FilterActs
        global WeightActs
        if FilterActs is None:
            from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
            from pylearn2.sandbox.cuda_convnet.weight_acts import WeightActs

        g_filters = WeightActs(stride=self.stride,
                               partial_sum=self.partial_sum,
                               pad=self.pad)(g_images, hid_acts,
                                             filters.shape[1:3])[0]
        assert not isinstance(g_filters, list)
        g_hid_acts = FilterActs(stride=self.stride,
                                pad=self.pad,
                                partial_sum=self.partial_sum)(g_images,
                                                              filters)

        return [g_hid_acts, g_filters, DisconnectedType()()]
Exemple #12
0
    def apply(self, v, **kwargs):
        input = v.output

        #input = utils.PrintShapeOp(input, 'conv')
        # See http://benanne.github.io/2014/04/03/faster-convolutions-in-theano.html
        # for further info about what follows.
        # See cuda-convnet for info about partial_sum
        from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
        conv_op = FilterActs(stride=self.kernel_stride,
                             pad=self.padding,
                             partial_sum=self.partial_sum)

        input_shuffled = input.dimshuffle(1, 2, 3, 0)
        filters_shuffled = self.filters.dimshuffle(1, 2, 3, 0)
        contiguous_input = gpu_contiguous(input_shuffled)
        contiguous_filters = gpu_contiguous(filters_shuffled)

        # out_shuffled is in channels, height, width, mb order
        out_shuffled = conv_op(contiguous_input, contiguous_filters)
        out_shuffled += self.filters_bias.dimshuffle(0, 'x', 'x', 'x')

        # unshuffling
        output = out_shuffled.dimshuffle(3, 0, 1, 2)

        nv = vcopy(v)
        nv.update(output=output)
        return self.post_apply(nv, **kwargs)
Exemple #13
0
    def test_dimshuffle_false_get_output_for(self, DummyInputLayer):
        try:
            from lasagne.layers.cuda_convnet import Conv2DCCLayer
        except ImportError:
            pytest.skip("cuda_convnet not available")

        # this implementation is tested against FilterActs instead of
        # theano.tensor.nnet.conv.conv2d because using the latter leads to
        # numerical precision errors.
        from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
        filter_acts = FilterActs(stride=1, pad=0, partial_sum=1)

        input = theano.shared(floatX(np.random.random((4, 5, 5, 8))))
        kernel = theano.shared(floatX(np.random.random((4, 3, 3, 16))))

        input_layer = DummyInputLayer((4, 5, 5, 8))  # c01b instead of bc01
        layer = Conv2DCCLayer(input_layer,
                              num_filters=16,
                              filter_size=(3, 3),
                              dimshuffle=False,
                              W=kernel,
                              b=None,
                              nonlinearity=None)

        output = np.array(filter_acts(input, kernel).eval())

        actual = layer.get_output_for(input).eval()
        actual = np.array(actual)
        assert actual.shape == output.shape
        assert actual.shape == layer.output_shape
        assert np.allclose(actual, output)
Exemple #14
0
    def __init__(self, rngs, input_layer, Lshape, traits, activation):
        super(ConvLayer, self).__init__(input_layer, traits, "Conv")

        self.rng = rngs[0]
        self.l2decay = traits['l2decay']
        filter_shape = Lshape[1]
        # The number of input channels must match number of filter channels
        assert Lshape[0][1] == filter_shape[1]
        self.pad = traits['padding']

        self.W = NNl.gen_weights(self.rng, filter_shape, 0, traits['initW'])

        # convolve input feature maps with filters
        # Using Alex K.'s fast CUDA conv, courtesy of S. Dieleman
        self.x = self.input_layer.output(False)
        conv_op = FilterActs(pad=self.pad, partial_sum=1)
        input_shuffled = (self.x).dimshuffle(1, 2, 3, 0) # bc01 to c01b
        filters_shuffled = (self.W).dimshuffle(1, 2, 3, 0) # bc01 to c01b
        contiguous_input = gpu_contiguous(input_shuffled)
        contiguous_filters = gpu_contiguous(filters_shuffled)
        out_shuffled = conv_op(contiguous_input, contiguous_filters)
        self.conv_out = out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01

        # store parameters of this layer
        self.params = [self.W]
Exemple #15
0
    def __init__(self, input_layer, n_filters, filter_size, weights_std, init_bias_value, stride=1, nonlinearity=layers.rectify, dropout=0., partial_sum=None, untie_biases=False):
        """
        This is a convolution which is circular in the 0-direction, and valid in the 1-direction.

        n_filters should be a multiple of 16
        """
        self.input_layer = input_layer
        self.n_filters = n_filters
        self.filter_size = filter_size
        self.weights_std = np.float32(weights_std)
        self.init_bias_value = np.float32(init_bias_value)
        self.stride = stride
        self.nonlinearity = nonlinearity
        self.dropout = dropout
        self.partial_sum = partial_sum
        self.untie_biases = untie_biases
        # if untie_biases == True, each position in the output map has its own bias (as opposed to having the same bias everywhere for a given filter)
        self.mb_size = self.input_layer.mb_size

        self.input_shape = self.input_layer.get_output_shape()

        self.filter_shape = (self.input_shape[0], filter_size, filter_size, n_filters)

        self.W = layers.shared_single(4) # theano.shared(np.random.randn(*self.filter_shape).astype(np.float32) * self.weights_std)

        if self.untie_biases:
            self.b = layers.shared_single(3)
        else:
            self.b = layers.shared_single(1) # theano.shared(np.ones(n_filters).astype(np.float32) * self.init_bias_value)

        self.params = [self.W, self.b]
        self.bias_params = [self.b]
        self.reset_params()

        self.filter_acts_op = FilterActs(stride=self.stride, partial_sum=self.partial_sum)
 def __init__(self,
              incoming,
              num_filters,
              filter_size,
              groups=1,
              strides=(1, 1),
              border_mode=None,
              untie_biases=False,
              W=init.Uniform(),
              b=init.Constant(0.),
              nonlinearity=nonlinearities.rectify,
              pad=None,
              dimshuffle=True,
              flip_filters=False,
              partial_sum=1,
              **kwargs):
     super(CaffeConv2DCCLayer, self).__init__(incoming,
                                              num_filters,
                                              filter_size,
                                              strides=strides,
                                              border_mode=border_mode,
                                              untie_biases=untie_biases,
                                              W=W,
                                              b=b,
                                              nonlinearity=nonlinearity,
                                              pad=pad,
                                              dimshuffle=dimshuffle,
                                              flip_filters=flip_filters,
                                              partial_sum=partial_sum,
                                              **kwargs)
     self.groups = groups
     self.filter_acts_op = FilterActs(numGroups=self.groups,
                                      stride=self.stride,
                                      partial_sum=self.partial_sum,
                                      pad=self.pad)
    def __init__(
            self,
            n_filters,
            filter_size,
            weights_std=0.01,
            init_bias_value=0.1,
            stride=1,
            activation='relu',
            partial_sum=None,
            pad=0,
            untie_biases=False,
            # check the keyword arguments if nopt on default values
            initW='truncated_normal',
            initB='constant',
            initial_weights=None,
            W_regularizer=None,
            W_constraint=None,
            b_regularizer=None,
            b_constraint=None,
            **kwargs):
        """
        Only the valid border mode is supported.

        n_filters should be a multiple of 16
        """

        self.initW = initializers.get({
            'class_name': initW,
            'config': {
                'stddev': weights_std
            }
        })
        self.initB = initializers.get({
            'class_name': initB,
            'config': {
                'value': init_bias_value
            }
        })
        self.initial_weights = initial_weights
        self.n_filters = n_filters
        self.filter_size = filter_size
        self.weights_std = np.float32(weights_std)
        self.init_bias_value = np.float32(init_bias_value)
        self.stride = stride
        self.nonlinearity = activations.get(activation)
        self.partial_sum = partial_sum
        self.pad = pad
        self.untie_biases = untie_biases
        self.W_regularizer = W_regularizer
        self.W_constraint = W_constraint
        self.b_regularizer = b_regularizer
        self.b_constraint = b_constraint

        self.filter_acts_op = FilterActs(stride=self.stride,
                                         partial_sum=self.partial_sum,
                                         pad=self.pad)
        super(kerasCudaConvnetConv2DLayer, self).__init__(**kwargs)
Exemple #18
0
    def __init__(self, incoming, num_filters, filter_size, stride=(1, 1),
                 pad=0, untie_biases=False, W=None,
                 b=init.Constant(0.), nonlinearity=nonlinearities.rectify,
                 dimshuffle=True, flip_filters=False, partial_sum=1,
                 **kwargs):
        if W is None:
            if dimshuffle:
                W = init.GlorotUniform()
            else:
                W = init.GlorotUniform(c01b=True)
        self.dimshuffle = dimshuffle

        super(Conv2DCCLayer, self).__init__(incoming, num_filters, filter_size,
                                            stride, pad, untie_biases, W, b,
                                            nonlinearity, flip_filters, n=2,
                                            **kwargs)
        self.partial_sum = partial_sum

        if self.filter_size[0] != self.filter_size[1]:
            raise RuntimeError("Conv2DCCLayer only supports square filters, "
                               "but filter_size=(%d, %d)" % filter_size)

        if self.stride[0] != self.stride[1]:
            raise RuntimeError("Conv2DCCLayer only supports square strides, "
                               "but stride=(%d, %d)" % stride)

        if self.num_filters % 16 != 0:
            raise RuntimeError("Conv2DCCLayer requires num_filters to be a "
                               "multiple of 16, but num_filters is "
                               "%d" % num_filters)

        if not (self.num_input_channels < 4 or
                self.num_input_channels % 4 == 0):
            raise RuntimeError("Conv2DCCLayer requires the number of input "
                               "channels to be 1, 2, 3 or a multiple of 4, "
                               "but it is %d" % self.num_input_channels)

        if isinstance(self.pad, tuple):
            if self.pad[0] != self.pad[1]:
                raise RuntimeError("Conv2DCCLayer only supports square "
                                   "padding, but pad=(%d, %d)" % pad)
            pad = self.pad[0]
        elif self.pad == 'same':
            pad = self.filter_size[0] // 2
        elif self.pad == 'full':
            pad = self.filter_size[0] - 1

        if not self.dimshuffle and self.untie_biases and self.b is not None:
            del self.params[self.b]
            biases_shape = (num_filters, self.output_shape[1],
                            self.output_shape[2])
            self.b = self.add_param(b, biases_shape, name="b",
                                    regularizable=False)

        self.filter_acts_op = FilterActs(stride=self.stride[0],
                                         partial_sum=self.partial_sum,
                                         pad=pad)
    def compileActivation(self, net, layerNum):
        variable = net.x if layerNum == 0 else net.varArrayA[layerNum - 1]

        #Calc shapes for reshape function on-the-fly. Assume we have square images as input.
        sX = T.cast(T.sqrt(T.shape(variable)[0] / self.kernel_shape[1]), 'int16')

        #Converts input from 2 to 4 dimensions
        Xr = T.reshape(variable.T, (T.shape(variable)[1], self.kernel_shape[1], sX, sX))

        if self.optimized:
            out_size = T.cast(
                T.ceil((T.shape(Xr)[-1] - T.shape(net.varWeights[layerNum]['w'])[-1] + 1) / np.float32(self.stride)),
                'int32')

            conv_op = FilterActs(stride=self.stride)
            input_shuffled = Xr.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
            filters_shuffled = net.varWeights[layerNum]['w'].dimshuffle(1, 2, 3, 0)  # bc01 to c01b
            filters_flipped = filters_shuffled[:, ::-1, ::-1, :] # flip rows and columns
            contiguous_input = gpu_contiguous(input_shuffled)
            contiguous_filters = gpu_contiguous(filters_flipped *
                                                (net.dropOutVectors[layerNum].dimshuffle('x', 0, 1, 'x') if self.dropout else 1.0))
            a = conv_op(contiguous_input, contiguous_filters)
            a = a[:, :out_size, :out_size, :]
            #Add bias
            a = a + net.varWeights[layerNum]['b'].dimshuffle(0, 'x', 'x', 'x')
        else:
            a = T.nnet.conv2d(Xr, net.varWeights[layerNum]['w'] *
                              (net.dropOutVectors[layerNum].dimshuffle('x', 'x', 0, 1) if self.dropout else 1.0),
                              border_mode='valid',
                              subsample=(self.stride, self.stride))
            #Add bias
            a = a + net.varWeights[layerNum]['b'].dimshuffle('x', 0, 'x', 'x')

        if self.pooling:
            if self.optimized:
                #Pooling
                # ds - side of square pool window
                # stride - Defines the stride size between successive pooling squares.
                # Setting this parameter smaller than sizeX produces overlapping pools.
                # Setting it equal to sizeX gives the usual, non-overlapping pools. Values greater than sizeX are not allowed.
                pool_op = MaxPool(ds=self.pooling_shape, stride=self.pooling_shape)

                contiguous_input = gpu_contiguous(a)
                a = pool_op(contiguous_input)
                a = a.dimshuffle(3, 0, 1, 2)       # c01b to bc01
            else:
                a = downsample.max_pool_2d(a, (self.pooling_shape, self.pooling_shape), ignore_border=False)
        else:
            if self.optimized:
                a = a.dimshuffle(3, 0, 1, 2)       # c01b to bc01

        a = T.flatten(a, outdim=2).T

        #Sigmoid
        a = self.activation(a, self.pool_size)

        net.varArrayA.append(a)
Exemple #20
0
    def dropout_fprop(self, input):

        # we reduce the precision of parameters for the computations
        self.fixed_W = apply_format(self.format, self.W, self.comp_precision,
                                    self.w_range)
        self.fixed_b = apply_format(self.format, self.b, self.comp_precision,
                                    self.b_range)

        # create the dropout mask
        # The cast is important because
        # int * float32 = float64 which pulls things off the gpu

        srng = T.shared_randomstreams.RandomStreams(self.rng.randint(999999))
        self.mask = T.cast(srng.binomial(n=1, p=self.p, size=T.shape(input)),
                           theano.config.floatX)
        input = input * self.mask

        self.fixed_x = input.reshape(self.image_shape)

        # convolution
        input_shuffled = self.fixed_x.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
        filters_shuffled = self.fixed_W.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
        conv_op = FilterActs(
            stride=self.filter_stride,
            partial_sum=self.partial_sum,
            pad=self.zero_pad
        )  # augment partial sum -> use less memory but slower
        contiguous_input = gpu_contiguous(input_shuffled)
        contiguous_filters = gpu_contiguous(filters_shuffled)
        conv_out_shuffled = conv_op(contiguous_input, contiguous_filters)

        self.z = conv_out_shuffled.dimshuffle(3, 0, 1, 2)  # c01b to bc01
        self.fixed_z = apply_format(self.format, self.z, self.comp_precision,
                                    self.z_range)

        conv_out_shuffled = self.fixed_z.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
        conv_out_shuffled = gpu_contiguous(conv_out_shuffled)

        # downsample each feature map individually, using maxpooling
        # pooled_out = downsample.max_pool_2d(input=conv_out,
        #                                     ds=poolsize, ignore_border=True)
        pool_op = MaxPool(ds=self.pool_shape, stride=self.pool_stride)
        pooled_out_shuffled = pool_op(conv_out_shuffled)
        pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2)  # c01b to bc01

        # bias
        self.u = pooled_out + self.fixed_b.dimshuffle('x', 0, 'x', 'x')
        self.fixed_u = apply_format(self.format, self.u, self.comp_precision,
                                    self.z_range)

        # activation
        self.y = self.activation(self.fixed_u).flatten(2)
        self.fixed_y = apply_format(self.format, self.y, self.comp_precision,
                                    self.y_range)

        return self.fixed_y
Exemple #21
0
    def apply_conv(self, input):
        """
        This method applies the convolution operation on the input provided
        
        @note Convolution operation in this version is not as powerful as using dnn_conv
        
        @param input: symbolic tensor of shape image_shape (theano.tensor.dtensor4)
                      A 4D tensor with the axes representing batch size, number of
                      channels, image height, and image width.
        ----------------------------------------------------------------------------------
        @return output : A 4D tensor of filtered images (feature maps) with dimensions
                         representing batch size, number of filters, feature map height,
                         and feature map width.
			
                        The height and width of the feature map depend on the border
                        mode. For 'valid' it is ``image_size - filter_size + 1`` while
                        for 'full' it is ``image_size + filter_size - 1``
        ----------------------------------------------------------------------------------
        Limitations of using FilterActs compared to conv2d:
        
        > Number of channels <= 3; If you want to compute the gradient, it should be divisible by 4.
        > Filters must be square.
        > Number of filters must be a multiple of 16
        > All minibatch sizes are supported, but the best performance is achieved when the minibatch size 
        is a multiple of 128.
        > Works only on the GPU
        """
        input_shuffled = input.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
        filters_shuffled = self.W.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
        ## Use zero padding with (filter_size - 1) border i.e. full convolution
        if self.border_mode == "full":
            padding = self.filter_shape[0] - 1
        else:
            padding = 0
        conv_out = FilterActs(stride=1, partial_sum=1, pad=padding)
        contiguous_input = gpu_contiguous(input_shuffled)
        contiguous_filters = gpu_contiguous(filters_shuffled)
        conv_out_shuffled = conv_out(contiguous_input, contiguous_filters)
        if self.pool == True:
            pool_op = MaxPool(ds=self.pool_size[0], stride=self.pool_size[0])
            pooled_out_shuffled = pool_op(conv_out_shuffled)
            pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1,
                                                        2)  # c01b to bc01
            pooled_out = max_pool_2d(input=conv_out, ds=self.pool_size)
        else:
            pooled_out = conv_out

        self.output = pooled_out

        if self.tied_biases:
            self.output += self.b.dimshuffle("x", 0, "x", "x")
        else:
            self.output += self.b.dimshuffle('x', 0, 1, 2)

        return self.output
Exemple #22
0
def make_funcs(batch_size, rows, cols, channels, filter_rows, num_filters):
    rng = np.random.RandomState([2012, 10, 9])

    filter_cols = filter_rows

    base_image_value = rng.uniform(
        -1., 1., (channels, rows, cols, batch_size)).astype('float32')
    base_filters_value = rng.uniform(
        -1., 1.,
        (channels, filter_rows, filter_cols, num_filters)).astype('float32')
    images = shared(base_image_value)
    filters = shared(base_filters_value, name='filters')

    # bench.py should always be run in gpu mode so we should not need a gpu_from_host here
    output = FilterActs()(images, filters)

    output_shared = shared(output.eval())

    cuda_convnet = function([], updates={output_shared: output})
    cuda_convnet.name = 'cuda_convnet'

    images_bc01v = base_image_value.transpose(3, 0, 1, 2)
    filters_bc01v = base_filters_value.transpose(3, 0, 1, 2)
    filters_bc01v = filters_bc01v[:, :, ::-1, ::-1]

    images_bc01 = shared(images_bc01v)
    filters_bc01 = shared(filters_bc01v)

    output_conv2d = conv2d(images_bc01,
                           filters_bc01,
                           border_mode='valid',
                           image_shape=images_bc01v.shape,
                           filter_shape=filters_bc01v.shape)

    output_conv2d_shared = shared(output_conv2d.eval())

    baseline = function([], updates={output_conv2d_shared: output_conv2d})
    baseline.name = 'baseline'

    return cuda_convnet, baseline
Exemple #23
0
    def lmul(self, x):
        """
        dot(x, A)
        aka, do convolution with input image x

        """

        cpu = 'Cuda' not in str(type(x))

        if cpu:
            x = gpu_from_host(x)

        # x must be formatted as channel, topo dim 0, topo dim 1, batch_index
        # for use with FilterActs
        assert x.ndim == 4
        x_axes = self.input_axes
        assert len(x_axes) == 4

        op_axes = ('c', 0, 1, 'b')

        if tuple(x_axes) != op_axes:
            x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])

        x = gpu_contiguous(x)

        rval = FilterActs(self.pad, self.partial_sum)(x, self._filters)

        # Format the output based on the output space
        rval_axes = self.output_axes
        assert len(rval_axes) == 4

        if tuple(rval_axes) != op_axes:
            rval = rval.dimshuffle(
                *[op_axes.index(axis) for axis in rval_axes])

        if cpu:
            rval = host_from_gpu(rval)

        return rval
def test_filter_acts_strided():

    # Tests that FilterActs with all possible strides

    rng = np.random.RandomState([2012, 10, 9])

    #Each list in shape_list :
    #[img_shape,filter_shape]
    #[(channels, rows, cols, batch_size),(channels, filter_rows, filter_cols, num_filters)]
    shape_list = [
        [(1, 7, 8, 5), (1, 2, 2, 16)],
        [(3, 7, 8, 5), (3, 3, 3, 16)],
        [(16, 11, 11, 4), (16, 4, 4, 16)],
        [(3, 20, 20, 3), (3, 5, 5, 16)],
        [(3, 21, 21, 3), (3, 6, 6, 16)],
    ]

    for test_idx in xrange(len(shape_list)):
        images = rng.uniform(-1., 1.,
                             shape_list[test_idx][0]).astype('float32')
        filters = rng.uniform(-1., 1.,
                              shape_list[test_idx][1]).astype('float32')
        gpu_images = float32_shared_constructor(images, name='images')
        gpu_filters = float32_shared_constructor(filters, name='filters')
        print("test case %d..." % (test_idx + 1))

        for ii in xrange(filters.shape[1]):
            stride = ii + 1

            output = FilterActs(stride=stride)(gpu_images, gpu_filters)
            output = host_from_gpu(output)
            f = function([], output)
            output_val = f()

            output_python = FilterActs_python(images, filters, stride)

            if np.abs(output_val - output_python).max() > 8.6e-6:
                assert type(output_val) == type(output_python)
                assert output_val.dtype == output_python.dtype
                if output_val.shape != output_python.shape:
                    print('cuda-convnet shape: ', output_val.shape)
                    print('python conv shape: ', output_python.shape)
                    assert False
                err = np.abs(output_val - output_python)
                print('stride %d' % stride)
                print('absolute error range: ', (err.min(), err.max()))
                print('mean absolute error: ', err.mean())
                print('cuda-convnet value range: ',
                      (output_val.min(), output_val.max()))
                print('python conv value range: ',
                      (output_python.min(), output_python.max()))
Exemple #25
0
    def lmul(self, x):
        """
        dot(x, A)
        aka, do convolution with input image x

        """

        cpu = 'Cuda' not in str(type(x))

        if cpu:
            x = gpu_from_host(x)

        # x must be formatted as channel, topo dim 0, topo dim 1, batch_index
        # for use with FilterActs
        assert x.ndim == 4
        x_axes = self.input_axes
        assert len(x_axes) == 4

        op_axes = ('c', 0, 1, 'b')

        if tuple(x_axes) != op_axes:
            x = x.dimshuffle(*[x_axes.index(axis) for axis in op_axes])

        x = gpu_contiguous(x)

        rval = FilterActs(self.pad, self.partial_sum)(x, self._filters)

        # Format the output based on the output space
        rval_axes = self.output_axes
        assert len(rval_axes) == 4

        if tuple(rval_axes) != op_axes:
            rval = rval.dimshuffle(*[op_axes.index(axis) for axis in rval_axes])

        if cpu:
            rval = host_from_gpu(rval)

        return rval
Exemple #26
0
 def fp(self, x, _):
   if self.on_gpu:
     print "conv on gpu..."
     conv_op = FilterActs(stride=self.subsample[0])
     input_shuffled = x.dimshuffle(1, 2, 3, 0) # bc01 to c01b
     filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b
     contiguous_input = gpu_contiguous(input_shuffled)
     contiguous_filters = gpu_contiguous(filters_shuffled)
     out_shuffled = conv_op(contiguous_input, contiguous_filters)
     self.output = out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01
   else:
     self.output = conv.conv2d(x, self.W,
       filter_shape=self.filter_shape, image_shape=self.in_shape,
       subsample=self.subsample, border_mode=self.border_mode)
Exemple #27
0
    def __init__(self,
                 input_layer,
                 n_filters,
                 filter_size,
                 weights_std,
                 stride=1,
                 nonlinearity=layers.rectify,
                 dropout=0.,
                 partial_sum=None,
                 pad=0,
                 trainable=True):
        """
        Only the valid border mode is supported.

        n_filters should be a multiple of 16
        """
        self.input_layer = input_layer
        self.input_shape = self.input_layer.get_output_shape()
        self.n_filters = n_filters
        n_channels = self.input_shape[0]
        self.n_channels = n_channels
        self.filter_size = filter_size
        self.weights_std = numpy.float32(weights_std)
        self.stride = stride
        self.nonlinearity = nonlinearity
        self.dropout = dropout
        self.partial_sum = partial_sum
        self.pad = pad
        self.mb_size = self.input_layer.mb_size

        self.data_order = layers.data_order.type2

        assert (len(self.input_layer.get_output_shape()) == 4), \
            'Input must have 4 dimensions.'

        assert (self.input_layer.data_order == self.data_order), \
            'Input data order does not match this layer\'s data order.'

        self.filter_shape = (n_channels, filter_size, filter_size, n_filters)

        self.trainable = trainable
        self.W = layers.shared_single(4)

        self.params = [self.W]
        self.reset_params()

        self.filter_acts_op = FilterActs(stride=self.stride,
                                         partial_sum=self.partial_sum,
                                         pad=self.pad)
Exemple #28
0
    def __init__(self,layer_def,input,input_shape,rs,clone_from=None):
        """
            Create a (GPU only) convolutional layer with shared variable internal parameters.
            Each filter has a corresponding bias
            
            
            :type layer_def: Element, xml containing configu for Conv layer

            :type input: tensor.tensor4
            
            :type input_shape: tuple or list of size 4
            :param input_shape: [channels,height,width,batchsize] c01b

            :type rs: a random number generator used to initialize weights
        """
        layer_name    = layer_def.attrib["name"]
        convPadStride = [ int(layer_def.find("convpad").text),int(layer_def.find("convstride").text)]
        num_filters   = int(layer_def.find("numfilters").text)
        filter_size   = int(layer_def.find("filtersize").text)
        init_bias     = float(layer_def.find("bias").text)
        rng           = np.random.RandomState(seed=int(time.time()))

        
        self.input    = gpu_contiguous(input)
        image_channels,image_size0,image_size1,batch_size    = input_shape
        filter_shape                              = [image_channels,filter_size,filter_size,num_filters]#c01b
        if clone_from is None:
            #W_bound   = 0.01#numpy.sqrt(6. / (fan_in + fan_out))
            W_bound   = np.sqrt( 2. / (filter_size*filter_size*image_channels) )#initialization from PRELU 
            self.W    = theano.shared( np.asarray(rng.normal(loc=0., scale=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True, name= layer_name+"-W")
            self.b    = theano.shared( np.asarray(init_bias*np.ones((num_filters,)), dtype=theano.config.floatX), borrow=True , name=layer_name+"-b")
        else:
            self.W    = clone_from.W
            self.b    = clone_from.b

        #CONV
        conv_op            = FilterActs(partial_sum=1,pad=convPadStride[0],stride=convPadStride[1])
        contiguous_filters = gpu_contiguous(self.W)
        self.output        = conv_op(self.input, contiguous_filters) + self.b.dimshuffle(0, 'x', 'x','x')

        #output size is equal to (image+2*pad - filter_size + 1) / stride
        output_size0       = (image_size0 + 2 * convPadStride[0] - filter_size + 1 ) / convPadStride[1] + (1 if convPadStride[1]>1 else 0)
        output_size1       = (image_size1 + 2 * convPadStride[0] - filter_size + 1 ) / convPadStride[1] + (1 if convPadStride[1]>1 else 0)
        self.input_shape   = input_shape#[filter_shape[0],img_size,img_size,filter_shape[0]]#c01b
        self.output_shape  = [num_filters, output_size0, output_size1, batch_size]#c01b
        self.params        = [self.W,self.b]
Exemple #29
0
def test_reject_rect():

    # Tests that running FilterActs with a non-square
    # kernel is an error

    rng = np.random.RandomState([2012, 10, 9])

    batch_size = 5
    rows = 10
    cols = 9
    channels = 3
    filter_rows = 4
    filter_cols = filter_rows + 1
    num_filters = 6

    images = shared(rng.uniform(
        -1., 1., (channels, rows, cols, batch_size)).astype('float32'),
                    name='images')
    filters = shared(rng.uniform(
        -1., 1.,
        (channels, filter_rows, filter_cols, num_filters)).astype('float32'),
                     name='filters')

    gpu_images = gpu_from_host(images)
    gpu_filters = gpu_from_host(filters)

    output = FilterActs()(gpu_images, gpu_filters)

    images_bc01 = images.dimshuffle(3, 0, 1, 2)
    filters_bc01 = images.dimshuffle(3, 0, 1, 2)

    output_conv2d = conv2d(images_bc01, filters_bc01, border_mode='valid')

    try:
        f = function([], [output, output_conv2d])
    except:
        raise KnownFailureTest(
            "cuda-convnet code depends on an unmerged theano feature.")

    try:
        output, output_conv2d = f()
    except ValueError:
        return

    assert False
Exemple #30
0
    def __init__(self, filter_size=7, num_channels=3):

        # magic numbers that make things work for stl10
        self.filter_size = filter_size
        self.pad = self.filter_size / 2  # -1
        self.num_channels = num_channels
        self.num_filters = 16
        input = T.ftensor4(name='input')
        filter = T.ftensor4(name='filter')
        gpu_input = gpu_contiguous(input)
        gpu_filter = gpu_contiguous(filter)

        self.conv_func = theano.function([input, filter],
                                         FilterActs(pad=self.pad)(gpu_input,
                                                                  gpu_filter))
        n = self.num_channels * self.filter_size * self.filter_size
        self.w = numpy.float32(
            numpy.ones((self.num_channels, self.filter_size, self.filter_size,
                        self.num_filters))) / n
 def __init__(self,
              incoming,
              num_filters,
              filter_size,
              groups=1,
              stride=(1, 1),
              border_mode=None,
              untie_biases=False,
              W=init.Uniform(),
              b=init.Constant(0.),
              nonlinearity=nonlinearities.rectify,
              pad=None,
              dimshuffle=True,
              flip_filters=False,
              partial_sum=1,
              **kwargs):
     super(CaffeConv2DCCLayer, self).__init__(incoming,
                                              num_filters,
                                              filter_size,
                                              stride=stride,
                                              untie_biases=untie_biases,
                                              W=W,
                                              b=b,
                                              nonlinearity=nonlinearity,
                                              pad=pad,
                                              dimshuffle=dimshuffle,
                                              flip_filters=flip_filters,
                                              partial_sum=partial_sum,
                                              **kwargs)
     self.groups = groups
     # the FilterActs in pylearn2 cannot accept tuple-type pad
     if isinstance(self.pad, int):
         self.pad = self.pad
     elif isinstance(self.pad, tuple):
         self.pad = self.pad[0]
     else:
         self.pad = 0
     self.filter_acts_op = FilterActs(numGroups=self.groups,
                                      stride=self.stride,
                                      partial_sum=self.partial_sum,
                                      pad=self.pad)
Exemple #32
0
    def fprop(self, input):

        # we reduce the precision of parameters for the computations
        self.w_comp = apply_format(self.format, self.W, self.comp_precision,
                                   self.w_range)
        self.b_comp = apply_format(self.format, self.b, self.comp_precision,
                                   self.b_range)

        input = input.reshape(self.image_shape)

        # convolution
        input_shuffled = input.dimshuffle(1, 2, 3, 0)  # bc01 to c01b
        filters_shuffled = self.w_comp.dimshuffle(
            1, 2, 3, 0) * self.scale  # bc01 to c01b
        conv_op = FilterActs(stride=self.filter_stride,
                             partial_sum=self.partial_sum,
                             pad=self.zero_pad)
        contiguous_input = gpu_contiguous(input_shuffled)
        contiguous_filters = gpu_contiguous(filters_shuffled)
        conv_out_shuffled = conv_op(contiguous_input, contiguous_filters)

        # downsample each feature map individually, using maxpooling
        # pooled_out = downsample.max_pool_2d(input=conv_out,
        #                                     ds=poolsize, ignore_border=True)
        pool_op = MaxPool(ds=self.pool_shape, stride=self.pool_stride)
        pooled_out_shuffled = pool_op(conv_out_shuffled)
        pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2)  # c01b to bc01

        # bias
        pooled_out = apply_format(
            self.format,
            pooled_out + self.b_comp.dimshuffle('x', 0, 'x', 'x') * self.scale,
            self.comp_precision, self.z_range)

        # activation
        pooled_out = self.activation(pooled_out)
        pooled_out = apply_format(self.format, pooled_out.flatten(2),
                                  self.comp_precision, self.y_range)

        return pooled_out
Exemple #33
0
def test_match_grad_valid_conv():

    # Tests that weightActs is the gradient of FilterActs
    # with respect to the weights.

    for partial_sum in [0, 1, 4]:
        rng = np.random.RandomState([2012, 10, 9])

        batch_size = 3
        rows = 7
        cols = 9
        channels = 8
        filter_rows = 4
        filter_cols = filter_rows
        num_filters = 16

        images = shared(rng.uniform(-1., 1., (channels, rows, cols,
                                              batch_size)).astype('float32'),
                        name='images')
        filters = rng.uniform(-1., 1.,
                              (channels, filter_rows,
                               filter_cols, num_filters)).astype('float32')
        filters = shared(filters, name='filters')

        gpu_images = gpu_from_host(images)
        gpu_filters = gpu_from_host(filters)

        output = FilterActs(partial_sum=partial_sum)(gpu_images, gpu_filters)
        output = host_from_gpu(output)

        images_bc01 = images.dimshuffle(3, 0, 1, 2)
        filters_bc01 = filters.dimshuffle(3, 0, 1, 2)
        filters_bc01 = filters_bc01[:, :, ::-1, ::-1]

        output_conv2d = conv2d(images_bc01, filters_bc01,
                               border_mode='valid')

        output_conv2d = output_conv2d.dimshuffle(1, 2, 3, 0)

        theano_rng = MRG_RandomStreams(2013 + 1 + 31)

        coeffs = theano_rng.normal(avg=0., std=1.,
                                   size=output_conv2d.shape, dtype='float32')

        cost_conv2d = (coeffs * output_conv2d).sum()

        weights_grad_conv2d = T.grad(cost_conv2d, filters)

        cost = (coeffs * output).sum()
        hid_acts_grad = T.grad(cost, output)

        weights_grad = WeightActs(partial_sum=partial_sum)(
            gpu_images,
            gpu_from_host(hid_acts_grad),
            as_tensor_variable((4, 4))
        )[0]
        weights_grad = host_from_gpu(weights_grad)

        f = function([], [output, output_conv2d, weights_grad,
                          weights_grad_conv2d])

        output, output_conv2d, weights_grad, weights_grad_conv2d = f()

        if np.abs(output - output_conv2d).max() > 8e-6:
            assert type(output) == type(output_conv2d)
            assert output.dtype == output_conv2d.dtype
            if output.shape != output_conv2d.shape:
                print('cuda-convnet shape: ', output.shape)
                print('theano shape: ', output_conv2d.shape)
                assert False
            err = np.abs(output - output_conv2d)
            print('absolute error range: ', (err.min(), err.max()))
            print('mean absolute error: ', err.mean())
            print('cuda-convnet value range: ', (output.min(), output.max()))
            print('theano value range: ', (output_conv2d.min(),
                                           output_conv2d.max()))
            assert False

        warnings.warn(
            "test_match_grad_valid_conv success criterion is not very strict."
            " Can we verify that this is OK? One possibility is that theano"
            " is numerically unstable and Alex's code is better. Probably"
            " theano CPU 64 bit is OK but it's worth checking the others.")

        if np.abs(weights_grad - weights_grad_conv2d).max() > 8.6e-6:
            if type(weights_grad) != type(weights_grad_conv2d):
                raise AssertionError("weights_grad is of type " +
                                     str(weights_grad))
            assert weights_grad.dtype == weights_grad_conv2d.dtype
            if weights_grad.shape != weights_grad_conv2d.shape:
                print('cuda-convnet shape: ', weights_grad.shape)
                print('theano shape: ', weights_grad_conv2d.shape)
                assert False
            err = np.abs(weights_grad - weights_grad_conv2d)
            print('absolute error range: ', (err.min(), err.max()))
            print('mean absolute error: ', err.mean())
            print('cuda-convnet value range: ', (weights_grad.min(),
                                                 weights_grad.max()))
            print('theano value range: ', (weights_grad_conv2d.min(),
                                           weights_grad_conv2d.max()))
            assert False
def test_match_valid_conv_padded():

    # Tests that running FilterActs with no padding is the same as running
    # theano's conv2D in valid mode

    rng = np.random.RandomState([2012,10,9])

    batch_size = 5
    rows = 10
    cols = 9
    channels = 3
    filter_rows = 4
    filter_cols = filter_rows
    num_filters = 16

    images = shared(rng.uniform(-1., 1., (channels, rows, cols,
        batch_size)).astype('float32'), name='images')
    filters = shared(rng.uniform(-1., 1., (channels, filter_rows,
        filter_cols, num_filters)).astype('float32'), name='filters')

    gpu_images = gpu_from_host(images)
    gpu_filters = gpu_from_host(filters)

    PAD = 3

    output = FilterActs(PAD)(gpu_images, gpu_filters)
    output = host_from_gpu(output)

    images_bc01 = T.alloc(0., batch_size, channels, rows + PAD * 2, cols + PAD * 2)

    images_bc01 = T.set_subtensor(images_bc01[:,:,PAD:-PAD,PAD:-PAD], images.dimshuffle(3,0,1,2))


    filters_bc01 = filters.dimshuffle(3,0,1,2)
    filters_bc01 = filters_bc01[:,:,::-1,::-1]

    output_conv2d = conv2d(images_bc01, filters_bc01,
            border_mode='valid')

    output_conv2d = output_conv2d.dimshuffle(1,2,3,0)

    f = function([], [output, output_conv2d])

    output, output_conv2d = f()

    warnings.warn("""test_match_valid_conv success criterion is not very strict. Can we verify that this is OK?
                     One possibility is that theano is numerically unstable and Alex's code is better.
                     Probably theano CPU 64 bit is OK but it's worth checking the others.""")

    assert output.shape == output_conv2d.shape

    if np.abs(output - output_conv2d).max() > 2.4e-6:
        assert type(output) == type(output_conv2d)
        assert output.dtype == output_conv2d.dtype
        if output.shape != output_conv2d.shape:
            print('cuda-convnet shape: ',output.shape)
            print('theano shape: ',output_conv2d.shape)
            assert False
        err = np.abs(output - output_conv2d)
        print('absolute error range: ', (err.min(), err.max()))
        print('mean absolute error: ', err.mean())
        print('cuda-convnet value range: ', (output.min(), output.max()))
        print('theano value range: ', (output_conv2d.min(), output_conv2d.max()))
        assert False
import time
import cPickle as pickle
import numpy as np
import theano
import theano.tensor as T

from theano.sandbox.cuda.basic_ops import gpu_from_host

# Theano's own convolution implementation
from theano.tensor.nnet import conv

# cuda-convnet convolution implementation
from pylearn2.sandbox.cuda_convnet.filter_acts import FilterActs
filter_acts_op = FilterActs(stride=1, partial_sum=1, pad=0)

# FFT-based convolution implementation
import fftconv

target_path = "speedtest_data.pkl"

num_runs = 10  # number of times each convolution is run,
# running time is averaged across these runs.

atol = 1e-3
rtol = 1e-5
std = 0.1

shapes_list = [
    # (input_shape, filter_shape)
    # ((minibatch_size, num_input_channels, image_width, image_height),
    #  (num_filters, num_input_channels, filter_width, filter_height))
Exemple #36
0
    def __init__(self, rng, input, filter_shape, image_shape,
                 pad = 0, poolsize=(2, 2), activation = T.tanh, poolstride=(2, 2),
                 init_type="tanh",
                 W=None, b=None):
        """
        Allocate a LeNetConvPoolLayer with shared variable internal parameters.

        :type rng: numpy.random.RandomState
        :param rng: a random number generator used to initialize weights

        :type input: theano.tensor.dtensor4
        :param input: symbolic image tensor, of shape image_shape

        :type filter_shape: tuple or list of length 4
        :param filter_shape: (number of filters, num input feature maps,
                              filter height, filter width)

        :type image_shape: tuple or list of length 4
        :param image_shape: (batch size, num input feature maps,
                             image height, image width)

        :type poolsize: tuple or list of length 2
        :param poolsize: the downsampling (pooling) factor (#rows, #cols)
        """

        assert image_shape[1] == filter_shape[1]
        self.input = input

	# there are "num input feature maps * filter height * filter width"
	# inputs to each hidden unit
	fan_in = numpy.prod(filter_shape[1:])
	# each unit in the lower layer receives a gradient from:
	# "num output feature maps * filter height * filter width" /
	#   pooling size
	
	if init_type=="ReLU":
	    print "ConvPoolLayer with He init"
	    std = numpy.sqrt(2.0/fan_in)
	    self.W = theano.shared(
		numpy.asarray(
		    rng.normal(0, std, size=filter_shape),
		    dtype=theano.config.floatX
		),
		borrow=True
	    )
	else:
	    print "ConvPoolLayer with Xavier init"
	    fan_out = (filter_shape[0] * numpy.prod(filter_shape[2:]) /
		       numpy.prod(poolsize))
	    # initialize weights with random weights
	    W_bound = numpy.sqrt(6. / (fan_in + fan_out))    
	    self.W = theano.shared(
		numpy.asarray(
		    rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
		    dtype=theano.config.floatX
		),
		borrow=True
	    )
        if W!=None:
            self.W.set_value(W)

	# the bias is a 1D tensor -- one bias per output feature map
	b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
	self.b = theano.shared(value=b_values, borrow=True)
        if b!=None:
            self.b.set_value(b)
            
        # convolve input feature maps with filters
        #conv_out = conv.conv2d(
        #    input=input,
        #    filters=self.W,
        #    filter_shape=filter_shape,
        #    image_shape=image_shape,
        #    border_mode='full'
        #)
        input_shuffled = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b
        filters_shuffled = self.W.dimshuffle(1, 2, 3, 0) # bc01 to c01b
        conv_op = FilterActs(stride=1, partial_sum=1, pad=pad)
        contiguous_input = gpu_contiguous(input_shuffled)
        contiguous_filters = gpu_contiguous(filters_shuffled)
        conv_out_shuffled = conv_op(contiguous_input, contiguous_filters)

        # downsample each feature map individually, using maxpooling
        #pooled_out = downsample.max_pool_2d(
        #    input=conv_out,
        #    ds=poolsize,
        #    st=poolstride,
        #    ignore_border=False
        #)
        pool_op = MaxPool(ds=poolsize[0], stride=poolstride[0])
        pooled_out_shuffled = pool_op(conv_out_shuffled)
        pooled_out = pooled_out_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01
    
        # add the bias term. Since the bias is a vector (1D array), we first
        # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will
        # thus be broadcasted across mini-batches and feature map
        # width & height
        #self.output = T.tanh(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        #self.output = relu(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))
        self.output = activation(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x'))

        stride = 1# not used
        assert (image_shape[2]-filter_shape[2]+2*pad)%stride==0
        output_im_size = (image_shape[2]-filter_shape[2]+2*pad)/stride+1
        assert output_im_size%poolsize[0]==0
        output_im_size = output_im_size//poolsize[0]
        self.output_shape = [image_shape[0],
                            filter_shape[0],
                            output_im_size,
                            output_im_size]
                            
        # store parameters of this layer
        self.params = [self.W, self.b]
Exemple #37
0
    def __init__(self, rng, input, filter_shape, image_shape,
                 activation=prelu, W1=None, W2=None, b1=None, b2=None):

        assert image_shape[1] == filter_shape[1]
        self.input = input

	# there are "num input feature maps * filter height * filter width"
	# inputs to each hidden unit
	fan_in = numpy.prod(filter_shape[1:])
	# each unit in the lower layer receives a gradient from:
	# "num output feature maps * filter height * filter width" /
	#   pooling size
	fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])
	# initialize weights with random weights
	W_bound = numpy.sqrt(6. / (fan_in + fan_out))
	self.W1 = theano.shared(
	    numpy.asarray(
		rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
		dtype=theano.config.floatX
	    ),
	    borrow=True
	)
        if W1!=None:
            self.W1.set_value(W1)

	# the bias is a 1D tensor -- one bias per output feature map
	b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
	self.b1 = theano.shared(value=b_values, borrow=True)
        if b1!=None:
            self.b1.set_value(b1)


        assert filter_shape[2]%2==1# odd size
        pad = (filter_shape[2]-1)//2
            
        input_shuffled = input.dimshuffle(1, 2, 3, 0) # bc01 to c01b
        w1_shuffled = self.W1.dimshuffle(1, 2, 3, 0) # bc01 to c01b
        conv_op1 = FilterActs(stride=1, partial_sum=1, pad=pad)
        contiguous_input = gpu_contiguous(input_shuffled)
        contiguous_w1 = gpu_contiguous(w1_shuffled)
        conv_out_1_shuffled = conv_op1(contiguous_input, contiguous_w1)
        conv_out_1 = conv_out_1_shuffled.dimshuffle(3, 0, 1, 2)# c01b to bc01
        activ_1_out = activation(conv_out_1+self.b1.dimshuffle('x',0,'x','x'))


        filter_shape[1] = filter_shape[0]

	# there are "num input feature maps * filter height * filter width"
	# inputs to each hidden unit
	fan_in = numpy.prod(filter_shape[1:])
	# each unit in the lower layer receives a gradient from:
	# "num output feature maps * filter height * filter width" /
	#   pooling size
	fan_out = filter_shape[0] * numpy.prod(filter_shape[2:])
	# initialize weights with random weights
	W_bound = numpy.sqrt(6. / (fan_in + fan_out))
	self.W2 = theano.shared(
	    numpy.asarray(
		rng.uniform(low=-W_bound, high=W_bound, size=filter_shape),
		dtype=theano.config.floatX
	    ),
	    borrow=True
	)
        if W2!=None:
            self.W2.set_value(W2)

	# the bias is a 1D tensor -- one bias per output feature map
	b_values = numpy.zeros((filter_shape[0],), dtype=theano.config.floatX)
	self.b2 = theano.shared(value=b_values, borrow=True)
        if b2!=None:
            self.b2.set_value(b2)

        w2_shuffled = self.W2.dimshuffle(1, 2, 3, 0) # bc01 to c01b
        activ_1_out_shuffled = activ_1_out.dimshuffle(1, 2, 3, 0) # bc01 to c01b
        contiguous_activ_1_out = gpu_contiguous(activ_1_out_shuffled)
        contiguous_w2 = gpu_contiguous(w2_shuffled)
        conv_op2 = FilterActs(stride=1, partial_sum=1, pad=pad)
        conv_out_2_shuffled = conv_op2(contiguous_activ_1_out, contiguous_w2)
        conv_out_2 = conv_out_2_shuffled.dimshuffle(3, 0, 1, 2) # c01b to bc01
        self.output = activation(conv_out_2+self.b2.dimshuffle('x', 0, 'x', 'x')+input)

        stride = 1# not used
        assert (image_shape[2]-filter_shape[2]+2*pad)%stride==0
        output_im_size = (image_shape[2]-filter_shape[2]+2*pad)/stride+1
        self.output_shape = [image_shape[0],
                            filter_shape[0],
                            output_im_size,
                            output_im_size]
                            
        # store parameters of this layer
        self.params = [self.W1, self.b1, self.W2, self.b2]
    def __init__(self,
                 incoming,
                 num_filters,
                 filter_size,
                 stride=(1, 1),
                 border_mode=None,
                 untie_biases=False,
                 W=None,
                 b=init.Constant(0.),
                 nonlinearity=nonlinearities.rectify,
                 pad=None,
                 dimshuffle=True,
                 flip_filters=False,
                 partial_sum=1,
                 **kwargs):
        super(Conv2DCCLayer, self).__init__(incoming, **kwargs)
        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        filter_size = as_tuple(filter_size, 2)
        stride = as_tuple(stride, 2)

        if filter_size[0] != filter_size[1]:
            raise RuntimeError("Conv2DCCLayer only supports square filters, "
                               "but filter_size=(%d, %d)" % filter_size)

        if stride[0] != stride[1]:
            raise RuntimeError("Conv2DCCLayer only supports square strides, "
                               "but stride=(%d, %d)" % stride)

        if num_filters % 16 != 0:
            raise RuntimeError("Conv2DCCLayer requires num_filters to be a "
                               "multiple of 16, but num_filters is "
                               "%d" % num_filters)

        self.num_filters = num_filters
        self.filter_size = filter_size[0]
        self.stride = stride[0]
        self.untie_biases = untie_biases
        self.dimshuffle = dimshuffle
        self.flip_filters = flip_filters
        self.partial_sum = partial_sum

        if border_mode is not None and pad is not None:
            raise RuntimeError("You cannot specify both 'border_mode' and "
                               "'pad'. To avoid ambiguity, please specify "
                               "only one of them.")
        elif border_mode is None and pad is None:
            # no option specified, default to valid mode
            self.pad = 0
        elif border_mode is not None:
            if border_mode == 'valid':
                self.pad = 0
            elif border_mode == 'full':
                self.pad = self.filter_size - 1
            elif border_mode == 'same':
                # only works for odd filter size, but the even filter size case
                # is probably not worth supporting.
                self.pad = (self.filter_size - 1) // 2
            else:
                raise RuntimeError("Unsupported border_mode for "
                                   "Conv2DCCLayer: %s" % border_mode)
        else:
            self.pad = pad

        if W is None:
            if dimshuffle:
                W = init.GlorotUniform()
            else:
                W = init.GlorotUniform(c01b=True)

        self.W = self.create_param(W, self.get_W_shape())
        if b is None:
            self.b = None
        elif self.untie_biases:
            output_shape = self.get_output_shape()
            if self.dimshuffle:
                self.b = self.create_param(
                    b, (num_filters, output_shape[2], output_shape[3]))
            else:
                self.b = self.create_param(
                    b, (num_filters, output_shape[1], output_shape[2]))
        else:
            self.b = self.create_param(b, (num_filters, ))

        self.filter_acts_op = FilterActs(stride=self.stride,
                                         partial_sum=self.partial_sum,
                                         pad=self.pad)