def test_DownsampleFactorMax(self): rng = numpy.random.RandomState(utt.fetch_seed()) # generate random images maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3)) imval = rng.rand(4, 2, 16, 16) images = tensor.dtensor4() for maxpoolshp, ignore_border, mode in product( maxpoolshps, [True, False], ['max', 'average_inc_pad', 'average_exc_pad']): # print 'maxpoolshp =', maxpoolshp # print 'ignore_border =', ignore_border # Pure Numpy computation numpy_output_val = self.numpy_max_pool_2d(imval, maxpoolshp, ignore_border, mode=mode) output = max_pool_2d(images, maxpoolshp, ignore_border, mode=mode) f = function([ images, ], [ output, ]) output_val = f(imval) assert numpy.all(output_val == numpy_output_val) # DownsampleFactorMax op maxpool_op = DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border, mode=mode)(images) f = function([images], maxpool_op) output_val = f(imval) utt.assert_allclose(output_val, numpy_output_val)
def test_DownsampleFactorMaxPaddingStride(self): ignore_border = True # padding does not support ignore_border=False rng = numpy.random.RandomState(utt.fetch_seed()) maxpoolsizes = [(3, 3), (4, 4), (3, 4), (4, 3), (2, 2)] stridesizes = [(2, 2), (2, 2), (1, 1), (1, 2), (2, 2)] paddingsizes = [(2, 2), (1, 2), (2, 1), (0, 0), (1, 1)] imgsizes = [(5, 5), (5, 5), (5, 6), (6, 5), (5, 5)] m = 4 # minibatch c = 2 # channel size images = tensor.dtensor4() for indx, mode in product( numpy.arange(len(maxpoolsizes)), ['max', 'average_inc_pad', 'average_exc_pad']): imgsize = imgsizes[indx] imval = rng.rand(m, c, imgsize[0], imgsize[1]) - 0.5 stridesize = stridesizes[indx] maxpoolsize = maxpoolsizes[indx] paddingsize = paddingsizes[indx] numpy_output_val = self.numpy_max_pool_2d_stride_padding( imval, maxpoolsize, ignore_border, stridesize, paddingsize, mode) maxpool_op = DownsampleFactorMax(maxpoolsize, ignore_border=ignore_border, st=stridesize, padding=paddingsize, mode=mode)(images) f = function([images], maxpool_op) output_val = f(imval) utt.assert_allclose(output_val, numpy_output_val)
def mp(input): return DownsampleFactorMax( maxpoolsize, ignore_border=True, st=stridesize, padding=paddingsize, mode=mode, )(input)
def test_DownsampleFactorMax(self): rng = numpy.random.RandomState(utt.fetch_seed()) # generate random images maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3)) imval = rng.rand(4, 10, 64, 64) images = tensor.dtensor4() for maxpoolshp in maxpoolshps: for ignore_border in [True, False]: #print 'maxpoolshp =', maxpoolshp #print 'ignore_border =', ignore_border # Pure Numpy computation numpy_output_val = self.numpy_max_pool_2d( imval, maxpoolshp, ignore_border) output = max_pool_2d(images, maxpoolshp, ignore_border) f = function([ images, ], [ output, ]) output_val = f(imval) assert numpy.all(output_val == numpy_output_val) #DownsampleFactorMax op maxpool_op = DownsampleFactorMax( maxpoolshp, ignore_border=ignore_border)(images) f = function([images], maxpool_op) output_val = f(imval) assert (numpy.abs(output_val - numpy_output_val) < 1e-5).all()
def test_DownsampleFactorMaxStride(self): rng = numpy.random.RandomState(utt.fetch_seed()) maxpoolshps = ((1, 1), (3, 3), (5, 3)) stridesizes = ((1, 1), (3, 3), (5, 7)) # generate random images imval = rng.rand(4, 10, 16, 16) outputshps = ((4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3), (4, 10, 16, 16), (4, 10, 6, 6), (4, 10, 4, 3), (4, 10, 14, 14), (4, 10, 5, 5), (4, 10, 3, 2), (4, 10, 14, 14), (4, 10, 6, 6), (4, 10, 4, 3), (4, 10, 12, 14), (4, 10, 4, 5), (4, 10, 3, 2), (4, 10, 12, 14), (4, 10, 5, 6), (4, 10, 4, 3)) images = tensor.dtensor4() indx = 0 for maxpoolshp in maxpoolshps: for ignore_border in [True, False]: for stride in stridesizes: outputshp = outputshps[indx] indx += 1 #DownsampleFactorMax op numpy_output_val = \ self.numpy_max_pool_2d_stride(imval, maxpoolshp, ignore_border, stride) assert numpy_output_val.shape == outputshp, ( "outshape is %s, calculated shape is %s" % (outputshp, numpy_output_val.shape)) maxpool_op = \ DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border, st=stride)(images) f = function([images], maxpool_op) output_val = f(imval) utt.assert_allclose(output_val, numpy_output_val)
def mp(input, grad): out = DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border, st=stride)(input) grad_op = DownsampleFactorMaxGrad( maxpoolshp, ignore_border=ignore_border, st=stride) return grad_op(input, out, grad)
def test_infer_shape(self): image = tensor.dtensor4() maxout = tensor.dtensor4() gz = tensor.dtensor4() rng = numpy.random.RandomState(utt.fetch_seed()) maxpoolshps = ((1, 1), (2, 2), (3, 3), (2, 3), (3, 2)) image_val = rng.rand(4, 6, 7, 9) out_shapes = [[[4, 6, 7, 9], [4, 6, 7, 9]], [[4, 6, 3, 4], [4, 6, 4, 5]], [[4, 6, 2, 3], [4, 6, 3, 3]], [[4, 6, 3, 3], [4, 6, 4, 3]], [[4, 6, 2, 4], [4, 6, 3, 5]]] for i, maxpoolshp in enumerate(maxpoolshps): for j, ignore_border in enumerate([True, False]): # checking shapes generated by DownsampleFactorMax self._compile_and_check([image], [ DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border)(image) ], [image_val], DownsampleFactorMax) # checking shapes generated by DownsampleFactorMaxGrad maxout_val = rng.rand(*out_shapes[i][j]) gz_val = rng.rand(*out_shapes[i][j]) self._compile_and_check([image, maxout, gz], [ DownsampleFactorMaxGrad(maxpoolshp, ignore_border=ignore_border)( image, maxout, gz) ], [image_val, maxout_val, gz_val], DownsampleFactorMaxGrad, warn=False)
def max_pool_3d(input, ds, ignore_border=False): # [n,c,x,y,z]以外の入力は受け付けない if input.ndim != 5: raise NotImplementedError( 'max_pool_3d requires a input [n, c, x, y, z]') # 入力次元 vid_dim = input.ndim # [y, z]フレームの次元数 frame_shape = input.shape[-2:] # バッチサイズ # フレーム次元以外の全ての次元の要素数を掛け合わせる batch_size = T.prod(input.shape[:-2]) # http://deeplearning.net/software/theano/library/tensor/basic.html#theano.tensor.shape_padright batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([ 1, ]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) op = DownsampleFactorMax((ds[1], ds[2]), ignore_border) output = op(input_4D) outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) shufl = (list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [vid_dim - 3]) input_time = out.dimshuffle(shufl) vid_shape = input_time.shape[-2:] batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([ 1, ]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) op = DownsampleFactorMax((1, ds[0]), ignore_border) outtime = op(input_4D_time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [vid_dim - 2]) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def test_downsample(): import random shps = [ (1, 1, 1, 12), (1, 1, 2, 2), (1, 1, 1, 1), (1,1,4,4), (1, 1, 10, 11), (1, 2, 2, 2), (3,5,4,4), (25, 1, 7, 7), (1, 1, 12, 12), (1, 1, 2, 14), (1, 1, 12, 14), (1, 1, 14, 14), (1, 1, 16, 16), (1, 1, 18, 18), (1, 1, 24, 24), (1, 6, 24, 24), (10, 1, 24, 24), (10, 6, 24, 24), (30, 6, 12, 12), (30, 2, 24, 24), (30, 6, 24, 24), (10, 10, 10, 11), (1,1,10,1025), (1,1,10,1023), (1,1,1025,10), (1,1,1023,10), ] numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps) for shp in shps: for ds in (2, 2), (3,2), (1,1): if ds[0] > shp[2]: continue if ds[1] > shp[3]: continue #GpuDownsampleFactorMax don't having more then 512 columns in the output tensor if float(shp[3])/ds[1]>512: continue for ignore_border in (True, False): print 'test_downsample', shp, ds, ignore_border ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border) a = tcn.shared_constructor(my_rand(*shp), 'a') f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu) f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu) assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in f.maker.env.toposort()]) assert any([isinstance(node.op, DownsampleFactorMax) for node in f2.maker.env.toposort()]) assert numpy.allclose(f(),f2()) g = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_with_gpu) g2 = pfunc([], tensor.grad(ds_op(tensor.as_tensor_variable(a)).sum(),a), mode=mode_without_gpu) assert any([isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad) for node in g.maker.env.toposort()]) assert any([isinstance(node.op, DownsampleFactorMaxGrad) for node in g2.maker.env.toposort()]) assert numpy.allclose(g(),g2())
def mp(input, grad): out = DownsampleFactorMax( maxpoolsize, ignore_border=True, st=stridesize, padding=paddingsize, )(input) grad_op = MaxPoolGrad(maxpoolsize, ignore_border=True, st=stridesize, padding=paddingsize) return grad_op(input, out, grad)
def test_DownsampleFactorMaxStrideExtra(self): rng = numpy.random.RandomState(utt.fetch_seed()) maxpoolshps = ((5, 3), (5, 3), (5, 3), (5, 5), (3, 2), (7, 7), (9, 9)) stridesizes = ((3, 2), (7, 5), (10, 6), (1, 1), (2, 3), (10, 10), (1, 1)) imvsizs = ((16, 16), (16, 16), (16, 16), (8, 5), (8, 5), (8, 5), (8, 5)) outputshps = ((4, 10, 4, 7), (4, 10, 5, 8), (4, 10, 2, 3), (4, 10, 3, 4), (4, 10, 2, 3), (4, 10, 2, 3), (4, 10, 4, 1), (4, 10, 4, 1), (4, 10, 3, 2), (4, 10, 4, 2), (4, 10, 1, 0), (4, 10, 1, 1), (4, 10, 0, 0), (4, 10, 1, 1)) images = tensor.dtensor4() for indx in numpy.arange(len(maxpoolshps)): imvsize = imvsizs[indx] imval = rng.rand(4, 10, imvsize[0], imvsize[1]) stride = stridesizes[indx] maxpoolshp = maxpoolshps[indx] for ignore_border, mode in product([True, False], ['max', 'sum', 'average_inc_pad', 'average_exc_pad']): indx_out = indx * 2 if not ignore_border: indx_out += 1 outputshp = outputshps[indx_out] # DownsampleFactorMax op numpy_output_val = \ self.numpy_max_pool_2d_stride(imval, maxpoolshp, ignore_border, stride, mode) assert numpy_output_val.shape == outputshp, ( "outshape is %s, calculated shape is %s" % (outputshp, numpy_output_val.shape)) maxpool_op = \ DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border, st=stride, mode=mode)(images) f = function([images], maxpool_op) output_val = f(imval) utt.assert_allclose(output_val, numpy_output_val)
def mp(input): return DownsampleFactorMax( maxpoolshp, ignore_border=ignore_border)(input)
def test_downsample(): shps = [ (1, 1, 1, 12), (1, 1, 2, 2), (1, 1, 1, 1), (1, 1, 4, 4), (1, 1, 10, 11), (1, 2, 2, 2), (3, 5, 4, 4), (25, 1, 7, 7), (1, 1, 12, 12), (1, 1, 2, 14), (1, 1, 12, 14), (1, 1, 14, 14), (1, 1, 16, 16), (1, 1, 18, 18), (1, 1, 24, 24), (1, 6, 24, 24), (10, 1, 24, 24), (10, 6, 24, 24), (30, 6, 12, 12), (30, 2, 24, 24), (30, 6, 24, 24), (10, 10, 10, 11), (1, 1, 10, 1025), (1, 1, 10, 1023), (1, 1, 1025, 10), (1, 1, 1023, 10), (65536, 1, 10, 10), (1, 65536, 10, 10), ] numpy.random.RandomState(unittest_tools.fetch_seed()).shuffle(shps) for shp in shps: for ds in (2, 2), (3, 2), (1, 1): if ds[0] > shp[2]: continue if ds[1] > shp[3]: continue # GpuDownsampleFactorMax doesn't like having more than 512 columns # in the output tensor. if float(shp[3]) / ds[1] > 512: continue for ignore_border in (True, False): # print 'test_downsample', shp, ds, ignore_border ds_op = DownsampleFactorMax(ds, ignore_border=ignore_border) a = tcn.shared_constructor(my_rand(*shp), 'a') f = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_with_gpu.excluding('cudnn')) f2 = pfunc([], ds_op(tensor.as_tensor_variable(a)), mode=mode_without_gpu) assert any([ isinstance(node.op, tcn.blas.GpuDownsampleFactorMax) for node in f.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, DownsampleFactorMax) for node in f2.maker.fgraph.toposort() ]) assert numpy.allclose(f(), f2()) # The grad is too slow on GT220 GPU # This cause the computer to freeze... # Remove this when it gets optimized enough # This only bypass the last 2 checks # Those tests where passing in all Mode on a GTX470 if shp[0] > 30000 or shp[1] > 30000: continue g = pfunc([], tensor.grad( ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_with_gpu.excluding('cudnn')) g2 = pfunc([], tensor.grad( ds_op(tensor.as_tensor_variable(a)).sum(), a), mode=mode_without_gpu) assert any([ isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGrad) for node in g.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, DownsampleFactorMaxGrad) for node in g2.maker.fgraph.toposort() ]) assert numpy.allclose(g(), g2()), shp ggf = gradient.Lop( tensor.grad((ds_op(tensor.as_tensor_variable(a))**2).sum(), a), a, a) ref_mode = copy.copy(mode_without_gpu) ref_mode.check_py_code = False gpu_mode = copy.copy(mode_with_gpu) gpu_mode.check_py_code = False gg = pfunc([], ggf, mode=gpu_mode) gg2 = pfunc([], ggf, mode=ref_mode) assert any([ isinstance(node.op, tcn.blas.GpuDownsampleFactorMaxGradGrad) for node in gg.maker.fgraph.toposort() ]) assert any([ isinstance(node.op, DownsampleFactorMaxGradGrad) for node in gg2.maker.fgraph.toposort() ]) assert numpy.allclose(gg(), gg2()), shp
def max_pool_3d(input, ds, ignore_border=False): """ Takes as input a N-D tensor, where N >= 3. It downscales the input video by the specified factor, by keeping only the maximum value of non-overlapping patches of size (ds[0],ds[1],ds[2]) (time, height, width) :type input: N-D theano tensor of input images. :param input: input images. Max pooling will be done over the 3 last dimensions. :type ds: tuple of length 3 :param ds: factor by which to downscale. (2,2,2) will halve the video in each dimension. :param ignore_border: boolean value. When True, (5,5,5) input with ds=(2,2,2) will generate a (2,2,2) output. (3,3,3) otherwise. """ if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the time dimension. Shift the time dimension to the third # position, so rows and cols are in the back if (ds[1] > 1) or (ds[2] > 1): # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([ 1, ]), frame_shape), 'int32') input_4D = tensor.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax((ds[1], ds[2]), ignore_border) output = op(input_4D) # restore to original shape outshape = tensor.join(0, input.shape[:-2], output.shape[-2:]) out = tensor.reshape(output, outshape, ndim=input.ndim) else: out = input if ds[0] == 1: return out # now maxpool time # output (time, rows, cols), reshape so that time is in the back # shufl = (list(range(vid_dim-3)) + [vid_dim-2]+[vid_dim-1]+[vid_dim-4]) shufl = (0, 2, 3, 4, 1) input_time = out.dimshuffle(shufl) # reset dimensions # vid_shape = input_time.shape[-2:] vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = tensor.prod(input_time.shape[:-2]) batch_size = tensor.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = tensor.cast( tensor.join(0, batch_size, tensor.as_tensor([ 1, ]), vid_shape), 'int32') input_4D_time = tensor.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax((1, ds[0]), ignore_border) outtime = op(input_4D_time) # output # restore to original shape (xxx, rows, cols, time) outshape = tensor.join(0, input_time.shape[:-2], outtime.shape[-2:]) # shufl = (list(range(vid_dim-3)) + [vid_dim-1]+[vid_dim-3]+[vid_dim-2]) shufl = (0, 4, 1, 2, 3) return tensor.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def maxpool_3D(input, ds, ignore_border=False): #input.dimshuffle (0, 2, 1, 3, 4) # convert to make video in back. # no need to reshuffle. if input.ndim < 3: raise NotImplementedError('max_pool_3d requires a dimension >= 3') # extract nr dimensions vid_dim = input.ndim # max pool in two different steps, so we can use the 2d implementation of # downsamplefactormax. First maxpool frames as usual. # Then maxpool the time dimension. Shift the time dimension to the third # position, so rows and cols are in the back # extract dimensions frame_shape = input.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,height,width) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([ 1, ]), frame_shape), 'int32') input_4D = T.reshape(input, new_shape, ndim=4) # downsample mini-batch of videos in rows and cols op = DownsampleFactorMax( (ds[1], ds[2]), ignore_border ) # so second and third dimensions of ds are for height and width output = op(input_4D) # restore to original shape outshape = T.join(0, input.shape[:-2], output.shape[-2:]) out = T.reshape(output, outshape, ndim=input.ndim) # now maxpool time # output (time, rows, cols), reshape so that time is in the back shufl = (list(range(vid_dim - 3)) + [vid_dim - 2] + [vid_dim - 1] + [vid_dim - 3]) input_time = out.dimshuffle(shufl) # reset dimensions vid_shape = input_time.shape[-2:] # count the number of "leading" dimensions, store as dmatrix batch_size = T.prod(input_time.shape[:-2]) batch_size = T.shape_padright(batch_size, 1) # store as 4D tensor with shape: (batch_size,1,width,time) new_shape = T.cast(T.join(0, batch_size, T.as_tensor([ 1, ]), vid_shape), 'int32') input_4D_time = T.reshape(input_time, new_shape, ndim=4) # downsample mini-batch of videos in time op = DownsampleFactorMax( (1, ds[0]), ignore_border) # Here the time dimension is downsampled. outtime = op(input_4D_time) # output # restore to original shape (xxx, rows, cols, time) outshape = T.join(0, input_time.shape[:-2], outtime.shape[-2:]) shufl = (list(range(vid_dim - 3)) + [vid_dim - 1] + [vid_dim - 3] + [vid_dim - 2]) #rval = T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl) return T.reshape(outtime, outshape, ndim=input.ndim).dimshuffle(shufl)
def mp(input): return DownsampleFactorMax(maxpoolshp, ignore_border=ignore_border, st=stride, mode=mode)(input)
def mp(input, grad): out = DownsampleFactorMax( maxpoolshp, ignore_border=ignore_border)(input) grad_op = MaxPoolGrad(maxpoolshp, ignore_border=ignore_border) return grad_op(input, out, grad)
def max_pool_2d_same_size(input, patch_size): output = DownsampleFactorMax(patch_size, True)(input) outs = DownsampleFactorMaxGrad(patch_size, True)(input, output, output) return outs