def maxpool_3D(input, ds, ignore_border=False):
   
    #input.dimshuffle (0, 2, 1, 3, 4)   # convert to make video in back. 
    # no need to reshuffle. 
    if input.ndim < 3:
        raise NotImplementedError('max_pool_3d requires a dimension >= 3')

    # extract nr dimensions
    vid_dim = input.ndim
    # max pool in two different steps, so we can use the 2d implementation of 
    # downsamplefactormax. First maxpool frames as usual. 
    # Then maxpool the time dimension. Shift the time dimension to the third 
    # position, so rows and cols are in the back


    # extract dimensions
    frame_shape = input.shape[-2:]
    
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    
    # store as 4D tensor with shape: (batch_size,1,height,width)
    new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1,]), 
                                        frame_shape), 'int32')
    input_4D = T.reshape(input, new_shape, ndim=4)

    # downsample mini-batch of videos in rows and cols
    op = DownsampleFactorMax((ds[1],ds[2]), ignore_border)          # so second and third dimensions of ds are for height and width
    output = op(input_4D)
    # restore to original shape                                     
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    # now maxpool time
    # output (time, rows, cols), reshape so that time is in the back
    shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-3])
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    vid_shape = input_time.shape[-2:]
    
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size,1)
    
    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = T.cast(T.join(0, batch_size,
                                        T.as_tensor([1,]), 
                                        vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    # downsample mini-batch of videos in time
    op = DownsampleFactorMax((1,ds[0]), ignore_border)            # Here the time dimension is downsampled. 
    outtime = op(input_4D_time)
    # output 
    # restore to original shape (xxx, rows, cols, time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
    #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
Exemple #2
0
def max_pool_3d(input, ds, ignore_border=False):
    """
		Takes as input a N-D tensor, where N >= 3. It downscales the input video by
		the specified factor, by keeping only the maximum value of non-overlapping
		patches of size (ds[0],ds[1],ds[2]) (time, height, width)  
		
		:type input: N-D theano tensor of input images.
		:param input: input images. Max pooling will be done over the 3 last dimensions.
		:type ds: tuple of length 3
		:param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension.
		:param ignore_border: boolean value. Example when True, (5,5,5) input with ds=(2,2,2) will generate a
		(2,2,2) output. (3,3,3) otherwise.
	"""
    if input.ndim < 3:
        raise NotImplementedError('max_pool_3d requires a dimension >= 3')

    vid_dim = input.ndim
    #Maxpool frame
    frame_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)
    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([
        1,
    ]), frame_shape), 'int32')

    input_4D = T.reshape(input, new_shape, ndim=4)
    # downsample mini-batch of videos in rows and cols
    op = DownsampleFactorMax((ds[1], ds[2]), ignore_border)
    output = op(input_4D)
    # restore to original shape
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    #Maxpool time
    # output (time, rows, cols), reshape so that time is in the back
    shufl = (list(range(vid_dim - 4)) + list(range(vid_dim - 3, vid_dim)) +
             [vid_dim - 4])
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    vid_shape = input_time.shape[-2:]
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)
    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([
        1,
    ]), vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    # downsample mini-batch of videos in time
    op = DownsampleFactorMax((1, ds[0]), ignore_border)
    outtime = op(input_4D_time)
    # restore to original shape (xxx, rows, cols, time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim - 4)) + [vid_dim - 1] +
             list(range(vid_dim - 4, vid_dim - 1)))
    #shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
Exemple #3
0
    def test_infer_shape(self):
        image = tensor.dtensor4()
        maxout = tensor.dtensor4()
        gz = tensor.dtensor4()
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3), (3, 2))

        image_val = rng.rand(4, 6, 7, 9)
        out_shapes = [[[[4, 6, 7, 9], [4, 6, 7, 9]],
                       [[4, 6, 3, 4], [4, 6, 4, 5]],
                       [[4, 6, 2, 3], [4, 6, 3, 3]],
                       [[4, 6, 3, 3], [4, 6, 4, 3]],
                       [[4, 6, 2, 4], [4, 6, 3, 5]]],
                      [[None, None],
                       [[4, 6, 4, 5], None],
                       [[4, 6, 3, 3], None],
                       [[4, 6, 4, 3], None],
                       [[4, 6, 3, 5], None]],
                      [[None, None],
                       [None, None],
                       [[4, 6, 3, 4], None],
                       [[4, 6, 4, 4], None],
                       [None, None]]]

        for i, maxpoolshp in enumerate(maxpoolshps):
            for j, ignore_border in enumerate([True, False]):
                for k, padding in enumerate([(0, 0), (1, 1), (1, 2)]):
                    if out_shapes[k][i][j] is None:
                        continue
                    # checking shapes generated by DownsampleFactorMax
                    self._compile_and_check([image],
                                            [DownsampleFactorMax(maxpoolshp,
                                                                 ignore_border=ignore_border,
                                                                 padding=padding)(image)],
                                            [image_val], DownsampleFactorMax)

                    # checking shapes generated by MaxPoolGrad
                    maxout_val = rng.rand(*out_shapes[k][i][j])
                    gz_val = rng.rand(*out_shapes[k][i][j])
                    self._compile_and_check([image, maxout, gz],
                                            [MaxPoolGrad(maxpoolshp,
                                                         ignore_border=ignore_border,
                                                         padding=padding)
                                            (image, maxout, gz)],
                                            [image_val, maxout_val, gz_val],
                                            MaxPoolGrad,
                                            warn=False)
        # checking with broadcastable input
        image = tensor.tensor(dtype='float64',
                              broadcastable=(False, False, True, True))
        image_val = rng.rand(4, 6, 1, 1)
        self._compile_and_check(
            [image],
            [DownsampleFactorMax((2, 2),
                                 ignore_border=True,
                                 padding=(0, 0))(image)],
            [image_val], DownsampleFactorMax)
Exemple #4
0
def max_pool_3d(input, ds, ignore_border=False):
    """
    Perfrom 3D max-pooling
		
	:type input: theano.tensor
	:param input: input feature volumes
	
	:type ds: tuple of length 3
	:param ds: factor by which to downscale, typically set as (2,2,2)
	
	:param ignore_border: boolean value. Example when True, (7,7,7) input with ds=(2,2,2) will generate a
	(3,3,3) output. (4,4,4) otherwise.
	"""

    vid_dim = input.ndim
    #Maxpool frame
    frame_shape = input.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)
    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([
        1,
    ]), frame_shape), 'int32')

    input_4D = T.reshape(input, new_shape, ndim=4)
    op = DownsampleFactorMax((ds[1], ds[2]), ignore_border)
    output = op(input_4D)
    # restore to original shape
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    #Maxpool time

    # output (time, rows, cols), reshape so that time is in the back
    shufl = (list(range(vid_dim - 4)) + list(range(vid_dim - 3, vid_dim)) +
             [vid_dim - 4])
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    vid_shape = input_time.shape[-2:]
    # count the number of "leading" dimensions, store as dmatrix
    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)
    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([
        1,
    ]), vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    op = DownsampleFactorMax((1, ds[0]), ignore_border)
    outtime = op(input_4D_time)
    # restore to original shape (xxx, rows, cols, time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim - 4)) + [vid_dim - 1] +
             list(range(vid_dim - 4, vid_dim - 1)))
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
Exemple #5
0
 def test_DownsampleFactorMaxStride(self):
     rng = numpy.random.RandomState(utt.fetch_seed())
     maxpoolshps = ((1, 1), (3, 3), (5, 3))
     stridesizes = ((1, 1), (3, 3), (5, 7))
     # generate random images
     imval = rng.rand(4, 10, 16, 16)
     outputshps = ((4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3),
                   (4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3),
                   (4, 10, 14, 14), (4, 10, 5, 5), (4, 10, 3, 2),
                   (4, 10, 14, 14), (4, 10, 6, 6), (4, 10, 4, 3),
                   (4, 10, 12, 14), (4, 10, 4, 5), (4, 10, 3, 2),
                   (4, 10, 12, 14), (4, 10, 5, 6), (4, 10, 4, 3))
     images = tensor.dtensor4()
     indx = 0
     for maxpoolshp in maxpoolshps:
         for ignore_border in [True, False]:
             for stride in stridesizes:
                 outputshp = outputshps[indx]
                 indx += 1
                 #DownsampleFactorMax op
                 numpy_output_val = \
                     self.numpy_max_pool_2d_stride(imval, maxpoolshp,
                                                   ignore_border, stride)
                 assert numpy_output_val.shape == outputshp, (
                     "outshape is %s, calculated shape is %s"
                     % (outputshp, numpy_output_val.shape))
                 maxpool_op = \
                     DownsampleFactorMax(maxpoolshp,
                                         ignore_border=ignore_border,
                                         st=stride)(images)
                 f = function([images], maxpool_op)
                 output_val = f(imval)
                 utt.assert_allclose(output_val, numpy_output_val)
    def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        imgsizes = ((10, 10), (10, 5), (5, 5))
        maxpoolsizes = ((5, 3), (3, 5), (3, 3))
        stridesizes = ((3, 2), (2, 3), (3, 3))
        paddingsizes = ((2, 2), (2, 1), (2, 2))

        for i in range(len(imgsizes)):
            imgsize = imgsizes[i]
            imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
            maxpoolsize = maxpoolsizes[i]
            stridesize = stridesizes[i]
            paddingsize = paddingsizes[i]

            grad_shape = DownsampleFactorMax.out_shape(imval.shape,
                                                       maxpoolsize, st=stridesize,
                                                       ignore_border=True,
                                                       padding=paddingsize)
            grad_val = rng.rand(*grad_shape) * 10.0

            def mp(input, grad):
                out = DownsampleFactorMax(
                    maxpoolsize, ignore_border=True,
                    st=stridesize,
                    padding=paddingsize,
                    )(input)
                grad_op = MaxPoolGrad(maxpoolsize, ignore_border=True,
                                      st=stridesize, padding=paddingsize)
                return grad_op(input, out, grad)
            utt.verify_grad(mp, [imval, grad_val], rng=rng)
    def test_AveragePoolPaddingStride_grad_grad(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        imgsizes = ((10, 10), (10, 5), (5, 5))
        avgpoolsizes = ((5, 3), (3, 5), (3, 3))
        stridesizes = ((3, 2), (2, 3), (3, 3))
        paddingsizes = ((2, 2), (2, 1), (2, 2))

        for i in range(len(imgsizes)):
            imgsize = imgsizes[i]
            imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
            avgpoolsize = avgpoolsizes[i]
            stridesize = stridesizes[i]
            paddingsize = paddingsizes[i]

            # 'average_exc_pad' with non-zero padding is not implemented
            for mode in ['sum', 'average_inc_pad']:
                grad_shape = DownsampleFactorMax.out_shape(imval.shape,
                                                           avgpoolsize,
                                                           st=stridesize,
                                                           ignore_border=True,
                                                           padding=paddingsize)
                grad_val = rng.rand(*grad_shape) * 10.0

                def mp(input, grad):
                    grad_op = AveragePoolGrad(avgpoolsize,
                                              ignore_border=True,
                                              st=stridesize,
                                              padding=paddingsize,
                                              mode=mode)
                    return grad_op(input, grad)

                utt.verify_grad(mp, [imval, grad_val], rng=rng)
    def test_AveragePoolGrad_grad_st_extra(self):
        """checks the gradient of the gradient for the case that
        stride is used for extra examples"""
        rng = numpy.random.RandomState(utt.fetch_seed())
        avgpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
        stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1), (2, 3), (10, 10), (1,
                                                                           1))
        imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5), (8, 5), (8, 5), (8,
                                                                          5))

        for indx in numpy.arange(len(avgpoolshps)):
            imvsize = imvsizs[indx]
            imval = rng.rand(1, 2, imvsize[0], imvsize[1])
            stride = stridesizes[indx]
            avgpoolshp = avgpoolshps[indx]
            for ignore_border in [True, False]:
                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
                    grad_shape = DownsampleFactorMax.out_shape(
                        imval.shape,
                        avgpoolshp,
                        ignore_border=ignore_border,
                        st=stride)
                    grad_val = rng.rand(*grad_shape)

                    def mp(input, grad):
                        grad_op = AveragePoolGrad(avgpoolshp,
                                                  ignore_border=ignore_border,
                                                  st=stride,
                                                  mode=mode)
                        return grad_op(input, grad)

                    # skip the grad verification when the output is empty
                    if numpy.prod(grad_shape) == 0:
                        continue
                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
Exemple #9
0
 def mp(input):
     return DownsampleFactorMax(
         maxpoolsize,
         ignore_border=True,
         st=stridesize,
         padding=paddingsize,
     )(input)
Exemple #10
0
    def test_infer_shape(self):
        image = tensor.dtensor4()
        maxout = tensor.dtensor4()
        gz = tensor.dtensor4()
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3), (3, 2))

        image_val = rng.rand(4, 6, 7, 9)
        out_shapes = [[[4, 6, 7, 9], [4, 6, 7, 9]], [[4, 6, 3, 4],
                                                     [4, 6, 4, 5]],
                      [[4, 6, 2, 3], [4, 6, 3, 3]], [[4, 6, 3, 3],
                                                     [4, 6, 4, 3]],
                      [[4, 6, 2, 4], [4, 6, 3, 5]]]

        for i, maxpoolshp in enumerate(maxpoolshps):
            for j, ignore_border in enumerate([True, False]):

                # checking shapes generated by DownsampleFactorMax
                self._compile_and_check([image], [
                    DownsampleFactorMax(maxpoolshp,
                                        ignore_border=ignore_border)(image)
                ], [image_val], DownsampleFactorMax)

                # checking shapes generated by DownsampleFactorMaxGrad
                maxout_val = rng.rand(*out_shapes[i][j])
                gz_val = rng.rand(*out_shapes[i][j])
                self._compile_and_check([image, maxout, gz], [
                    DownsampleFactorMaxGrad(maxpoolshp,
                                            ignore_border=ignore_border)(
                                                image, maxout, gz)
                ], [image_val, maxout_val, gz_val],
                                        DownsampleFactorMaxGrad,
                                        warn=False)
    def test_DownsampleFactorMaxGrad_grad_st_extra(self):
        """checks the gradient of the gradient for the case that
        stride is used for extra examples"""
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
        stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1), (2, 3), (10, 10), (1, 1))
        imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5), (8, 5), (8, 5), (8, 5))

        for indx in numpy.arange(len(maxpoolshps)):
            imvsize = imvsizs[indx]
            imval = rng.rand(1, 2, imvsize[0], imvsize[1])
            stride = stridesizes[indx]
            maxpoolshp = maxpoolshps[indx]
            for ignore_border in [True, False]:
                grad_shape = DownsampleFactorMax.out_shape(
                    imval.shape, maxpoolshp, ignore_border=ignore_border, st=stride
                )
                grad_val = rng.rand(*grad_shape)

                def mp(input, grad):
                    out = DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border, st=stride)(input)
                    grad_op = DownsampleFactorMaxGrad(maxpoolshp, ignore_border=ignore_border, st=stride)
                    return grad_op(input, out, grad)

                # skip the grad verification when the output is empty
                if numpy.prod(grad_shape) == 0:
                    continue
                utt.verify_grad(mp, [imval, grad_val], rng=rng)
    def test_DownsampleFactorMaxPaddingStride(self):
        ignore_border = True  # padding does not support ignore_border=False
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolsizes = [(3, 3), (4, 4), (3, 4), (4, 3), (2, 2)]
        stridesizes = [(2, 2), (2, 2), (1, 1), (1, 2), (2, 2)]
        paddingsizes = [(2, 2), (1, 2), (2, 1), (0, 0), (1, 1)]
        imgsizes = [(5, 5), (5, 5), (5, 6), (6, 5), (5, 5)]
        m = 4  # minibatch
        c = 2  # channel size
        images = tensor.dtensor4()
        for indx, mode in product(
                numpy.arange(len(maxpoolsizes)),
            ['max', 'sum', 'average_inc_pad', 'average_exc_pad']):
            imgsize = imgsizes[indx]
            imval = rng.rand(m, c, imgsize[0], imgsize[1]) - 0.5

            stridesize = stridesizes[indx]
            maxpoolsize = maxpoolsizes[indx]
            paddingsize = paddingsizes[indx]
            numpy_output_val = self.numpy_max_pool_2d_stride_padding(
                imval, maxpoolsize, ignore_border, stridesize, paddingsize,
                mode)
            maxpool_op = DownsampleFactorMax(maxpoolsize,
                                             ignore_border=ignore_border,
                                             st=stridesize,
                                             padding=paddingsize,
                                             mode=mode)(images)
            f = function([images], maxpool_op)
            output_val = f(imval)
            utt.assert_allclose(output_val, numpy_output_val)
Exemple #13
0
    def test_AveragePoolPaddingStride_grad_grad(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        imgsizes = ((10, 10), (10, 5), (5, 5))
        avgpoolsizes = ((5, 3), (3, 5), (3, 3))
        stridesizes = ((3, 2), (2, 3), (3, 3))
        paddingsizes = ((2, 2), (2, 1), (2, 2))

        for i in range(len(imgsizes)):
            imgsize = imgsizes[i]
            imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
            avgpoolsize = avgpoolsizes[i]
            stridesize = stridesizes[i]
            paddingsize = paddingsizes[i]

            # 'average_exc_pad' with non-zero padding is not implemented
            for mode in ['sum', 'average_inc_pad']:
                grad_shape = DownsampleFactorMax.out_shape(imval.shape,
                                                           avgpoolsize, st=stridesize,
                                                           ignore_border=True, padding=paddingsize)
                grad_val = rng.rand(*grad_shape) * 10.0

                def mp(input, grad):
                    grad_op = AveragePoolGrad(avgpoolsize, ignore_border=True,
                                              st=stridesize, padding=paddingsize,
                                              mode=mode)
                    return grad_op(input, grad)
                utt.verify_grad(mp, [imval, grad_val], rng=rng)
    def test_DownsampleFactorMaxGrad_grad_st(self):
        """checks the gradient of the gradient for
        the case that stride is used"""
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolshps = ((1, 1), (3, 3), (5, 3))
        stridesizes = ((1, 1), (3, 3), (5, 7))
        imval = rng.rand(1, 2, 16, 16)

        for maxpoolshp in maxpoolshps:
            for ignore_border in [True, False]:
                for stride in stridesizes:
                    grad_shape = DownsampleFactorMax.out_shape(
                        imval.shape,
                        maxpoolshp,
                        ignore_border=ignore_border,
                        st=stride)
                    grad_val = rng.rand(*grad_shape)

                    def mp(input, grad):
                        out = DownsampleFactorMax(maxpoolshp,
                                                  ignore_border=ignore_border,
                                                  st=stride)(input)
                        grad_op = MaxPoolGrad(maxpoolshp,
                                              ignore_border=ignore_border,
                                              st=stride)
                        return grad_op(input, out, grad)

                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
    def test_DownsampleFactorMaxPaddingStride_grad_grad(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        imgsizes = ((10, 10), (10, 5), (5, 5))
        maxpoolsizes = ((5, 3), (3, 5), (3, 3))
        stridesizes = ((3, 2), (2, 3), (3, 3))
        paddingsizes = ((2, 2), (2, 1), (2, 2))

        for i in range(len(imgsizes)):
            imgsize = imgsizes[i]
            imval = rng.rand(1, 1, imgsize[0], imgsize[1]) * 10.0
            maxpoolsize = maxpoolsizes[i]
            stridesize = stridesizes[i]
            paddingsize = paddingsizes[i]

            grad_shape = DownsampleFactorMax.out_shape(imval.shape,
                                                       maxpoolsize,
                                                       st=stridesize,
                                                       ignore_border=True,
                                                       padding=paddingsize)
            grad_val = rng.rand(*grad_shape) * 10.0

            def mp(input, grad):
                out = DownsampleFactorMax(
                    maxpoolsize,
                    ignore_border=True,
                    st=stridesize,
                    padding=paddingsize,
                )(input)
                grad_op = MaxPoolGrad(maxpoolsize,
                                      ignore_border=True,
                                      st=stridesize,
                                      padding=paddingsize)
                return grad_op(input, out, grad)

            utt.verify_grad(mp, [imval, grad_val], rng=rng)
    def test_AveragePoolGrad_grad_st(self):
        """checks the gradient of the gradient for
        the case that stride is used"""
        rng = numpy.random.RandomState(utt.fetch_seed())
        avgpoolshps = ((1, 1), (3, 3), (5, 3))
        stridesizes = ((1, 1), (3, 3), (5, 7))
        imval = rng.rand(1, 2, 16, 16)

        for avgpoolshp in avgpoolshps:
            for ignore_border in [True, False]:
                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
                    for stride in stridesizes:
                        grad_shape = DownsampleFactorMax.out_shape(
                            imval.shape,
                            avgpoolshp,
                            ignore_border=ignore_border,
                            st=stride)
                        grad_val = rng.rand(*grad_shape)

                        def mp(input, grad):
                            grad_op = AveragePoolGrad(
                                avgpoolshp,
                                ignore_border=ignore_border,
                                st=stride,
                                mode=mode)
                            return grad_op(input, grad)

                        utt.verify_grad(mp, [imval, grad_val], rng=rng)
Exemple #17
0
 def get_dim(self, name):
     if name == 'input_':
         return self.input_dim
     if name == 'output':
         return tuple(DownsampleFactorMax.out_shape(
             self.input_dim, self.pooling_size, st=self.step,
             ignore_border=self.ignore_border, padding=self.padding))
Exemple #18
0
    def test_DownsampleFactorMax(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        # generate random images
        maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3))
        imval = rng.rand(4, 2, 16, 16)
        images = tensor.dtensor4()
        for maxpoolshp, ignore_border, mode in product(
                maxpoolshps, [True, False],
            ['max', 'average_inc_pad', 'average_exc_pad']):
            # print 'maxpoolshp =', maxpoolshp
            # print 'ignore_border =', ignore_border

            # Pure Numpy computation
            numpy_output_val = self.numpy_max_pool_2d(imval,
                                                      maxpoolshp,
                                                      ignore_border,
                                                      mode=mode)
            output = max_pool_2d(images, maxpoolshp, ignore_border, mode=mode)
            f = function([
                images,
            ], [
                output,
            ])
            output_val = f(imval)
            assert numpy.all(output_val == numpy_output_val)

            # DownsampleFactorMax op
            maxpool_op = DownsampleFactorMax(maxpoolshp,
                                             ignore_border=ignore_border,
                                             mode=mode)(images)
            f = function([images], maxpool_op)
            output_val = f(imval)
            utt.assert_allclose(output_val, numpy_output_val)
Exemple #19
0
    def test_DownsampleFactorMax(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        # generate random images
        maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3))
        imval = rng.rand(4, 10, 64, 64)
        images = tensor.dtensor4()

        for maxpoolshp in maxpoolshps:
            for ignore_border in [True, False]:
                #print 'maxpoolshp =', maxpoolshp
                #print 'ignore_border =', ignore_border

                # Pure Numpy computation
                numpy_output_val = self.numpy_max_pool_2d(
                    imval, maxpoolshp, ignore_border)
                output = max_pool_2d(images, maxpoolshp, ignore_border)
                f = function([
                    images,
                ], [
                    output,
                ])
                output_val = f(imval)
                assert numpy.all(output_val == numpy_output_val)

                #DownsampleFactorMax op
                maxpool_op = DownsampleFactorMax(
                    maxpoolshp, ignore_border=ignore_border)(images)
                f = function([images], maxpool_op)
                output_val = f(imval)
                assert (numpy.abs(output_val - numpy_output_val) < 1e-5).all()
Exemple #20
0
 def get_dim(self, name):
     if name == 'input_':
         return self.input_dim
     if name == 'output':
         return tuple(DownsampleFactorMax.out_shape(self.input_dim,
                                                    self.pooling_size,
                                                    st=self.step))
Exemple #21
0
 def get_dim(self, name):
     if name == 'input_':
         return self.input_dim
     if name == 'output':
         return tuple(DownsampleFactorMax.out_shape(
             self.input_dim, self.pooling_size, st=self.step,
             ignore_border=self.ignore_border, padding=self.padding))
Exemple #22
0
 def mp(input, grad):
     out = DownsampleFactorMax(maxpoolshp,
                               ignore_border=ignore_border,
                               st=stride)(input)
     grad_op = DownsampleFactorMaxGrad(
         maxpoolshp, ignore_border=ignore_border, st=stride)
     return grad_op(input, out, grad)
Exemple #23
0
    def test_DownsampleFactorMaxGrad_grad_st(self):
        """checks the gradient of the gradient for
        the case that stride is used"""
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolshps = ((1, 1), (3, 3), (5, 3))
        stridesizes = ((1, 1), (3, 3), (5, 7))
        imval = rng.rand(1, 2, 16, 16)

        for maxpoolshp in maxpoolshps:
            for ignore_border in [True, False]:
                for stride in stridesizes:
                    grad_shape = DownsampleFactorMax.out_shape(
                        imval.shape, maxpoolshp,
                        ignore_border=ignore_border, st=stride)
                    grad_val = rng.rand(*grad_shape)

                    def mp(input, grad):
                        out = DownsampleFactorMax(
                            maxpoolshp, ignore_border=ignore_border,
                            st=stride)(input)
                        grad_op = MaxPoolGrad(
                            maxpoolshp, ignore_border=ignore_border,
                            st=stride)
                        return grad_op(input, out, grad)

                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
Exemple #24
0
def max_pool_3d(input, ds, ignore_border=False):
    # [n,c,x,y,z]以外の入力は受け付けない
    if input.ndim != 5:
        raise NotImplementedError(
            'max_pool_3d requires a input [n, c, x, y, z]')

    # 入力次元
    vid_dim = input.ndim

    # [y, z]フレームの次元数
    frame_shape = input.shape[-2:]

    # バッチサイズ
    # フレーム次元以外の全ての次元の要素数を掛け合わせる
    batch_size = T.prod(input.shape[:-2])
    # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.shape_padright
    batch_size = T.shape_padright(batch_size, 1)

    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([
        1,
    ]), frame_shape), 'int32')
    input_4D = T.reshape(input, new_shape, ndim=4)

    op = DownsampleFactorMax((ds[1], ds[2]), ignore_border)
    output = op(input_4D)
    outshape = T.join(0, input.shape[:-2], output.shape[-2:])
    out = T.reshape(output, outshape, ndim=input.ndim)

    shufl = (list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] +
             [vid_dim - 3])
    input_time = out.dimshuffle(shufl)
    vid_shape = input_time.shape[-2:]

    batch_size = T.prod(input_time.shape[:-2])
    batch_size = T.shape_padright(batch_size, 1)

    new_shape = T.cast(T.join(0, batch_size, T.as_tensor([
        1,
    ]), vid_shape), 'int32')
    input_4D_time = T.reshape(input_time, new_shape, ndim=4)
    op = DownsampleFactorMax((1, ds[0]), ignore_border)
    outtime = op(input_4D_time)
    outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:])
    shufl = (list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] +
             [vid_dim - 2])
    return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
Exemple #25
0
def test_downsample():
    import random
    shps = [ (1, 1, 1, 12),
            (1, 1, 2, 2),
            (1, 1, 1, 1),
            (1,1,4,4),
            (1, 1, 10, 11),
            (1, 2, 2, 2),
            (3,5,4,4),
            (25, 1, 7, 7),
            (1, 1, 12, 12),
            (1, 1, 2, 14),
            (1, 1, 12, 14),
            (1, 1, 14, 14),
            (1, 1, 16, 16),
            (1, 1, 18, 18),
            (1, 1, 24, 24),
            (1, 6, 24, 24),
            (10, 1, 24, 24),
            (10, 6, 24, 24),
            (30, 6, 12, 12),
            (30, 2, 24, 24),
            (30, 6, 24, 24),
            (10, 10, 10, 11),
            (1,1,10,1025),
            (1,1,10,1023),
            (1,1,1025,10),
            (1,1,1023,10),
             ]

    numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps)

    for shp in shps:
        for ds in (2, 2), (3,2), (1,1):
            if ds[0] > shp[2]: continue
            if ds[1] > shp[3]: continue
            #GpuDownsampleFactorMax don't having more then 512 columns in the output tensor
            if float(shp[3])/ds[1]>512: continue
            for ignore_border in (True, False):
                print 'test_downsample', shp, ds, ignore_border
                ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)

                a = tcn.shared_constructor(my_rand(*shp), 'a')
                f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu)
                f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu)
                assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in
                            f.maker.env.toposort()])
                assert any([isinstance(node.op, DownsampleFactorMax) for node in
                            f2.maker.env.toposort()])
                assert numpy.allclose(f(),f2())

                g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_with_gpu)
                g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_without_gpu)
                assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad)
                            for node in g.maker.env.toposort()])
                assert any([isinstance(node.op, DownsampleFactorMaxGrad)
                            for node in g2.maker.env.toposort()])
                assert numpy.allclose(g(),g2())
Exemple #26
0
 def mp(input, grad):
     out = DownsampleFactorMax(
         maxpoolsize, ignore_border=True,
         st=stridesize,
         padding=paddingsize,
         )(input)
     grad_op = MaxPoolGrad(maxpoolsize, ignore_border=True,
                           st=stridesize, padding=paddingsize)
     return grad_op(input, out, grad)
Exemple #27
0
def pool_output_shape_2d(input_shape,
                         axes,
                         pool_shape,
                         strides,
                         pads,
                         ignore_border=True):
    """
    compute output shape for a pool
    """
    return tuple(DownsampleFactorMax.out_shape(
        imgshape=input_shape,
        ds=pool_shape,
        st=strides,
        ignore_border=ignore_border,
        padding=pads,
    ))
Exemple #28
0
def pool_output_shape_2d(input_shape,
                         axes,
                         pool_shape,
                         strides,
                         pads,
                         ignore_border=True):
    """
    compute output shape for a pool
    """
    return tuple(
        DownsampleFactorMax.out_shape(
            imgshape=input_shape,
            ds=pool_shape,
            st=strides,
            ignore_border=ignore_border,
            padding=pads,
        ))
    def test_DownsampleFactorMaxGrad_grad(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolshps = ((1, 1), (3, 2), (2, 3))
        imval = rng.rand(2, 3, 3, 4) * 10.0
        # more variance means numeric gradient will be more accurate

        for maxpoolshp in maxpoolshps:
            for ignore_border in [True, False]:
                # print 'maxpoolshp =', maxpoolshp
                # print 'ignore_border =', ignore_border
                # The shape of the gradient will be the shape of the output
                grad_shape = DownsampleFactorMax.out_shape(imval.shape, maxpoolshp, ignore_border=ignore_border)
                grad_val = rng.rand(*grad_shape) * 10.0

                def mp(input, grad):
                    out = DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border)(input)
                    grad_op = DownsampleFactorMaxGrad(maxpoolshp, ignore_border=ignore_border)
                    return grad_op(input, out, grad)

                utt.verify_grad(mp, [imval, grad_val], rng=rng)
Exemple #30
0
 def test_DownsampleFactorMaxStrideExtra(self):
     rng = numpy.random.RandomState(utt.fetch_seed())
     maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9))
     stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1),
                    (2, 3), (10, 10), (1, 1))
     imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5),
                (8, 5), (8, 5), (8, 5))
     outputshps = ((4, 10, 4, 7), (4, 10, 5, 8), (4, 10, 2, 3),
                   (4, 10, 3, 4), (4, 10, 2, 3), (4, 10, 2, 3),
                   (4, 10, 4, 1), (4, 10, 4, 1), (4, 10, 3, 2),
                   (4, 10, 4, 2), (4, 10, 1, 0), (4, 10, 1, 1),
                   (4, 10, 0, 0), (4, 10, 1, 1))
     images = tensor.dtensor4()
     for indx in numpy.arange(len(maxpoolshps)):
         imvsize = imvsizs[indx]
         imval = rng.rand(4, 10, imvsize[0], imvsize[1])
         stride = stridesizes[indx]
         maxpoolshp = maxpoolshps[indx]
         for ignore_border, mode in product([True, False],
                                            ['max', 'sum',
                                             'average_inc_pad',
                                             'average_exc_pad']):
             indx_out = indx * 2
             if not ignore_border:
                 indx_out += 1
             outputshp = outputshps[indx_out]
             # DownsampleFactorMax op
             numpy_output_val = \
                 self.numpy_max_pool_2d_stride(imval, maxpoolshp,
                                               ignore_border, stride, mode)
             assert numpy_output_val.shape == outputshp, (
                 "outshape is %s, calculated shape is %s"
                 % (outputshp, numpy_output_val.shape))
             maxpool_op = \
                 DownsampleFactorMax(maxpoolshp,
                                     ignore_border=ignore_border,
                                     st=stride, mode=mode)(images)
             f = function([images], maxpool_op)
             output_val = f(imval)
             utt.assert_allclose(output_val, numpy_output_val)
Exemple #31
0
    def test_AveragePoolGrad_grad(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        avgpoolshps = ((1, 1), (3, 2), (2, 3))
        imval = rng.rand(2, 3, 3, 4) * 10.0
        # more variance means numeric gradient will be more accurate

        for avgpoolshp in avgpoolshps:
            for ignore_border in [True, False]:
                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
                    # print 'maxpoolshp =', maxpoolshp
                    # print 'ignore_border =', ignore_border
                    # The shape of the gradient will be the shape of the output
                    grad_shape = DownsampleFactorMax.out_shape(
                        imval.shape, avgpoolshp, ignore_border=ignore_border)
                    grad_val = rng.rand(*grad_shape) * 10.0

                    def mp(input, grad):
                        grad_op = AveragePoolGrad(
                            avgpoolshp, ignore_border=ignore_border, mode=mode)
                        return grad_op(input, grad)

                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
Exemple #32
0
    def test_AveragePoolGrad_grad(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        avgpoolshps = ((1, 1), (3, 2), (2, 3))
        imval = rng.rand(2, 3, 3, 4) * 10.0
        # more variance means numeric gradient will be more accurate

        for avgpoolshp in avgpoolshps:
            for ignore_border in [True, False]:
                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
                    # print 'maxpoolshp =', maxpoolshp
                    # print 'ignore_border =', ignore_border
                    # The shape of the gradient will be the shape of the output
                    grad_shape = DownsampleFactorMax.out_shape(
                        imval.shape, avgpoolshp, ignore_border=ignore_border)
                    grad_val = rng.rand(*grad_shape) * 10.0

                    def mp(input, grad):
                        grad_op = AveragePoolGrad(
                            avgpoolshp, ignore_border=ignore_border, mode=mode)
                        return grad_op(input, grad)

                    utt.verify_grad(mp, [imval, grad_val], rng=rng)
    def test_DownsampleFactorMaxGrad_grad(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        maxpoolshps = ((1, 1), (3, 2), (2, 3))
        imval = rng.rand(2, 3, 3, 4) * 10.0
        # more variance means numeric gradient will be more accurate

        for maxpoolshp in maxpoolshps:
            for ignore_border in [True, False]:
                # print 'maxpoolshp =', maxpoolshp
                # print 'ignore_border =', ignore_border
                # The shape of the gradient will be the shape of the output
                grad_shape = DownsampleFactorMax.out_shape(
                    imval.shape, maxpoolshp, ignore_border=ignore_border)
                grad_val = rng.rand(*grad_shape) * 10.0

                def mp(input, grad):
                    out = DownsampleFactorMax(
                        maxpoolshp, ignore_border=ignore_border)(input)
                    grad_op = MaxPoolGrad(maxpoolshp,
                                          ignore_border=ignore_border)
                    return grad_op(input, out, grad)

                utt.verify_grad(mp, [imval, grad_val], rng=rng)
    def test_DownsampleFactorMax(self):
        rng = numpy.random.RandomState(utt.fetch_seed())
        # generate random images
        maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3))
        imval = rng.rand(4, 2, 16, 16)
        images = tensor.dtensor4()
        for maxpoolshp, ignore_border, mode in product(maxpoolshps,
                                                       [True, False],
                                                       ['max',
                                                        'sum',
                                                        'average_inc_pad',
                                                        'average_exc_pad']):
                # print 'maxpoolshp =', maxpoolshp
                # print 'ignore_border =', ignore_border

                # Pure Numpy computation
                numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp,
                                                          ignore_border,
                                                          mode=mode)
                output = max_pool_2d(images, maxpoolshp, ignore_border,
                                     mode=mode)
                f = function([images, ], [output, ])
                output_val = f(imval)
                utt.assert_allclose(output_val, numpy_output_val)

                # DownsampleFactorMax op
                maxpool_op = DownsampleFactorMax(maxpoolshp,
                                                 ignore_border=ignore_border,
                                                 mode=mode)(images)

                output_shape = DownsampleFactorMax.out_shape(imval.shape, maxpoolshp,
                                                        ignore_border=ignore_border)
                utt.assert_allclose(numpy.asarray(output_shape), numpy_output_val.shape)
                f = function([images], maxpool_op)
                output_val = f(imval)
                utt.assert_allclose(output_val, numpy_output_val)
Exemple #35
0
    def test_AveragePoolGrad_grad_st(self):
        """checks the gradient of the gradient for
        the case that stride is used"""
        rng = numpy.random.RandomState(utt.fetch_seed())
        avgpoolshps = ((1, 1), (3, 3), (5, 3))
        stridesizes = ((1, 1), (3, 3), (5, 7))
        imval = rng.rand(1, 2, 16, 16)

        for avgpoolshp in avgpoolshps:
            for ignore_border in [True, False]:
                for mode in ['sum', 'average_inc_pad', 'average_exc_pad']:
                    for stride in stridesizes:
                        grad_shape = DownsampleFactorMax.out_shape(
                            imval.shape, avgpoolshp,
                            ignore_border=ignore_border, st=stride)
                        grad_val = rng.rand(*grad_shape)

                        def mp(input, grad):
                            grad_op = AveragePoolGrad(
                                avgpoolshp, ignore_border=ignore_border,
                                st=stride, mode=mode)
                            return grad_op(input, grad)

                        utt.verify_grad(mp, [imval, grad_val], rng=rng)
def init_net(num_of_classes, input_len, conv_params):
    """
    Major initialize of the neural net is in this method. You can adjust convolutional window size for each layer,
    number of filters for each layer and all the cascade parameters for every layer. We also initialize and define weights
    for neural net.
    :param num_of_classes: number of classes
    :param input_len: read (sequence chunk) length
    :return: weights in param variable, X and Y matrices, cost function, update function and maxima prediction
    """
    cwin1=4*6  # multiples of 4 because of data representation
    cwin2=3
    cwin3=2

    num_filters_1=32 / 2  # how many different filters to learn at each layer
    num_filters_2=48 / 2
    num_filters_3=64 / 2
    # size of convolution windows, for each layer different values can be used
    w = init_weights((num_filters_1, 1, 1, cwin1)) # first convolution, 32 filters, stack size 1, 1 rows, cwin1 columns
    w2 = init_weights((num_filters_2, num_filters_1, 1, cwin2)) # second convolution, 64 filters, stack size 32 (one stack for each filter from previous layer), 1 row, cwin2 columns
    w3 = init_weights((num_filters_3, num_filters_2, 1, cwin3)) # third convolution, 128 filters, stack size 64 (one stack for each filter from previous layes), 1 row, cwin3 columns

    print "#### CONVOLUTION PARAMETERS ####"
    print "cwin1 %d" % cwin1
    print "cwin2 %d" % cwin2
    print "cwin3 %d" % cwin3
    print "num_filters_1 %d" % num_filters_1
    print "num_filters_2 %d" % num_filters_2
    print "num_filters_3 %d" % num_filters_3

    # convolution: filters are moved by one position at a time, see parameter subsample=(1, 1)
    #
    # max pooling:
    #   scaling the input before applying the maxpool filter and
    #   displacement (stride) when sliding the max pool filters

    # l1 conv:
    es = input_len
    es = (es - cwin1 + 1)
    es = es / conv1_stride
    # l1 max_pool:
    es = DownsampleFactorMax.out_shape((1, es), (1, downscale1), st=(1, stride1))[1] # downscale for first layer
    print "l1 es:", es

    # l2 conv:
    es = (es - cwin2 + 1)
    # l2 max_pool:
    es = DownsampleFactorMax.out_shape((1, es), (1, downscale2), st=(1, stride2))[1] # downscale for second layer
    print "l2 es:", es

    # l3 conv:
    es = (es - cwin3 + 1)
    # l3 max_pool:
    es = DownsampleFactorMax.out_shape((1, es), (1, downscale3), st=(1, stride3))[1] # downscale for third layer
    print "l3 es:", es

    # downscaling is performed so that we correctly set number of filters in last layer

    w4 = init_weights((num_filters_3 * es, 500))  # fully conected last layer, connects the outputs of 128 filters to 500 (arbitrary) hidden nodes, which are then connected to the output nodes
    w_o = init_weights((500, num_of_classes))  # number of exptected classes

    # matrix types
    X = T.ftensor4()
    Y = T.fmatrix()

    noise_l1, noise_l2, noise_l3, noise_l4, noise_py_x = model(X, w, w2, w3, w4, 0.2, 0.5, w_o, conv_params)
    l1, l2, l3, l4, py_x = model(X, w, w2, w3, w4, 0., 0., w_o, conv_params)
    y_x = T.argmax(py_x, axis=1)  # maxima predictions

    cost = T.mean(T.nnet.categorical_crossentropy(noise_py_x, Y)) # classification matrix to optimize - maximize the value that is actually there and minimize the others
    params = [w, w2, w3, w4, w_o]
    updates = RMSprop(cost, params, lr=0.001) # update function

    return params, X, Y, cost, updates, y_x
 def mp(input, grad):
     out = DownsampleFactorMax(
         maxpoolshp, ignore_border=ignore_border)(input)
     grad_op = MaxPoolGrad(maxpoolshp,
                           ignore_border=ignore_border)
     return grad_op(input, out, grad)
 def mp(input):
     return DownsampleFactorMax(maxpoolshp,
                                ignore_border=ignore_border,
                                st=stride,
                                mode=mode)(input)
Exemple #39
0
def max_pool_3d(input, ds, ignore_border=False):
    """
    Takes as input a N-D tensor, where N >= 3. It downscales the input video by
    the specified factor, by keeping only the maximum value of non-overlapping
    patches of size (ds[0],ds[1],ds[2]) (time, height, width)

    :type input: N-D theano tensor of input images.
    :param input: input images. Max pooling will be done over the 3 last dimensions.
    :type ds: tuple of length 3
    :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension.
    :param ignore_border: boolean value. When True, (5,5,5) input with ds=(2,2,2) will generate a
      (2,2,2) output. (3,3,3) otherwise.
    """

    if input.ndim < 3:
        raise NotImplementedError('max_pool_3d requires a dimension >= 3')

    # extract nr dimensions
    vid_dim = input.ndim
    # max pool in two different steps, so we can use the 2d implementation of
    # downsamplefactormax. First maxpool frames as usual.
    # Then maxpool the time dimension. Shift the time dimension to the third
    # position, so rows and cols are in the back

    if (ds[1] > 1) or (ds[2] > 1):
        # extract dimensions
        frame_shape = input.shape[-2:]

        # count the number of "leading" dimensions, store as dmatrix
        batch_size = tensor.prod(input.shape[:-2])
        batch_size = tensor.shape_padright(batch_size, 1)

        # store as 4D tensor with shape: (batch_size,1,height,width)
        new_shape = tensor.cast(
            tensor.join(0, batch_size, tensor.as_tensor([
                1,
            ]), frame_shape), 'int32')
        input_4D = tensor.reshape(input, new_shape, ndim=4)

        # downsample mini-batch of videos in rows and cols
        op = DownsampleFactorMax((ds[1], ds[2]), ignore_border)
        output = op(input_4D)
        # restore to original shape
        outshape = tensor.join(0, input.shape[:-2], output.shape[-2:])
        out = tensor.reshape(output, outshape, ndim=input.ndim)
    else:
        out = input

    if ds[0] == 1:
        return out

    # now maxpool time

    # output (time, rows, cols), reshape so that time is in the back
    # shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-4])
    shufl = (0, 2, 3, 4, 1)
    input_time = out.dimshuffle(shufl)
    # reset dimensions
    # vid_shape = input_time.shape[-2:]
    vid_shape = input_time.shape[-2:]

    # count the number of "leading" dimensions, store as dmatrix
    batch_size = tensor.prod(input_time.shape[:-2])
    batch_size = tensor.shape_padright(batch_size, 1)

    # store as 4D tensor with shape: (batch_size,1,width,time)
    new_shape = tensor.cast(
        tensor.join(0, batch_size, tensor.as_tensor([
            1,
        ]), vid_shape), 'int32')
    input_4D_time = tensor.reshape(input_time, new_shape, ndim=4)
    # downsample mini-batch of videos in time
    op = DownsampleFactorMax((1, ds[0]), ignore_border)
    outtime = op(input_4D_time)
    # output
    # restore to original shape (xxx, rows, cols, time)
    outshape = tensor.join(0, input_time.shape[:-2], outtime.shape[-2:])
    # shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2])
    shufl = (0, 4, 1, 2, 3)
    return tensor.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def test_downsample():
    shps = [
        (1, 1, 1, 12),
        (1, 1, 2, 2),
        (1, 1, 1, 1),
        (1, 1, 4, 4),
        (1, 1, 10, 11),
        (1, 2, 2, 2),
        (3, 5, 4, 4),
        (25, 1, 7, 7),
        (1, 1, 12, 12),
        (1, 1, 2, 14),
        (1, 1, 12, 14),
        (1, 1, 14, 14),
        (1, 1, 16, 16),
        (1, 1, 18, 18),
        (1, 1, 24, 24),
        (1, 6, 24, 24),
        (10, 1, 24, 24),
        (10, 6, 24, 24),
        (30, 6, 12, 12),
        (30, 2, 24, 24),
        (30, 6, 24, 24),
        (10, 10, 10, 11),
        (1, 1, 10, 1025),
        (1, 1, 10, 1023),
        (1, 1, 1025, 10),
        (1, 1, 1023, 10),
        (65536, 1, 10, 10),
        (1, 65536, 10, 10),
    ]

    numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps)

    for shp in shps:
        for ds in (2, 2), (3, 2), (1, 1):
            if ds[0] > shp[2]:
                continue
            if ds[1] > shp[3]:
                continue
            # GpuDownsampleFactorMax doesn't like having more than 512 columns
            # in the output tensor.
            if float(shp[3]) / ds[1] > 512:
                continue
            for ignore_border in (True, False):
                # print 'test_downsample', shp, ds, ignore_border
                ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border)

                a = tcn.shared_constructor(my_rand(*shp), 'a')
                f = pfunc([],
                          ds_op(tensor.as_tensor_variable(a)),
                          mode=mode_with_gpu.excluding('cudnn'))
                f2 = pfunc([],
                           ds_op(tensor.as_tensor_variable(a)),
                           mode=mode_without_gpu)
                assert any([
                    isinstance(node.op, tcn.blas.GpuDownsampleFactorMax)
                    for node in f.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMax)
                    for node in f2.maker.fgraph.toposort()
                ])
                assert numpy.allclose(f(), f2())

                # The grad is too slow on GT220 GPU
                # This cause the computer to freeze...
                # Remove this when it gets optimized enough
                # This only bypass the last 2 checks
                # Those tests where passing in all Mode on a GTX470
                if shp[0] > 30000 or shp[1] > 30000:
                    continue

                g = pfunc([],
                          tensor.grad(
                              ds_op(tensor.as_tensor_variable(a)).sum(), a),
                          mode=mode_with_gpu.excluding('cudnn'))
                g2 = pfunc([],
                           tensor.grad(
                               ds_op(tensor.as_tensor_variable(a)).sum(), a),
                           mode=mode_without_gpu)
                assert any([
                    isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad)
                    for node in g.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMaxGrad)
                    for node in g2.maker.fgraph.toposort()
                ])
                assert numpy.allclose(g(), g2()), shp

                ggf = gradient.Lop(
                    tensor.grad((ds_op(tensor.as_tensor_variable(a))**2).sum(),
                                a), a, a)

                ref_mode = copy.copy(mode_without_gpu)
                ref_mode.check_py_code = False
                gpu_mode = copy.copy(mode_with_gpu)
                gpu_mode.check_py_code = False
                gg = pfunc([], ggf, mode=gpu_mode)
                gg2 = pfunc([], ggf, mode=ref_mode)

                assert any([
                    isinstance(node.op,
                               tcn.blas.GpuDownsampleFactorMaxGradGrad)
                    for node in gg.maker.fgraph.toposort()
                ])
                assert any([
                    isinstance(node.op, DownsampleFactorMaxGradGrad)
                    for node in gg2.maker.fgraph.toposort()
                ])
                assert numpy.allclose(gg(), gg2()), shp
Exemple #41
0
 def mp(input):
     return DownsampleFactorMax(
         maxpoolshp, ignore_border=ignore_border)(input)
Exemple #42
0
def maxpool_2d(z, in_dim, poolsize, poolstride):
    z = max_pool_2d(z, ds=poolsize, st=poolstride)
    output_size = tuple(DownsampleFactorMax.out_shape(in_dim, poolsize,
                                                      st=poolstride))
    return z, output_size