def deconv(self, X, subsample=(2, 2), border_mode=(2, 2), conv_mode='conv', atype='sigmoid'): """ sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ #Always return a c contiguous output. #Copy the input only if it is not already c contiguous. img = gpu_contiguous(X) kerns = gpu_contiguous(self.W) #Implement Alloc on the gpu, but without initializing memory. if someconfigs.backend == 'gpuarray': gpu_alloc_img_shape = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape #This Op builds a convolution descriptor for use in the other convolution operations. desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\ conv_mode=conv_mode)(kerns.shape) out = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) elif someconfigs.backend == 'cudandarray': gpu_alloc_img_shape = gpu_alloc_empty(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\ conv_mode=conv_mode)(gpu_alloc_img_shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) #The convolution gradient with respect to the inputs. d_img = GpuDnnConvGradI()(kerns, img, out, desc) return activation_fn_th(d_img, atype=atype)
def convolve(self, input, **kwargs): # def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): img = gpu_contiguous(input) kerns = gpu_contiguous(self.W) desc = GpuDnnConvDesc(border_mode=self.crop, subsample=self.stride, conv_mode='conv')(gpu_alloc_empty( img.shape[0], kerns.shape[1], img.shape[2] * self.stride[0], img.shape[3] * self.stride[1]).shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2] * self.stride[0], img.shape[3] * self.stride[1]) conved = GpuDnnConvGradI()(kerns, img, out, desc) # return d_img # border_mode = 'half' if self.crop == 'same' else self.crop # op = T.nnet.abstract_conv.AbstractConv2d_gradInputs( # imshp=self.output_shape, # kshp=self.get_W_shape(), # subsample=self.stride, border_mode=border_mode, # filter_flip=not self.flip_filters) # output_size = self.output_shape[2:] # if any(s is None for s in output_size): # output_size = self.get_output_shape_for(input.shape)[2:] # conved = op(self.W, input, output_size) return conved
def dnn_pool3d2d(inputs, pool_shape, pool_stride, image_shape, mode='max'): """ Pool first all time-slices, so 2d-poolings over width and height. Then do a 1dpooling over the time (done as fake2d pooling with pooling shape 1 for the ignored dimension.""" for i in xrange(3): assert pool_shape[i] <= image_shape[i], ("pool shape should be less" " or equal than image shape, {:d} > {:d} for " "pool_shape: {:s}, image_shape:{:s}").format(pool_shape[i], image_shape[i], pool_shape, image_shape) output_shape = [((image_shape[i] - pool_shape[i]) // pool_stride[i]) + 1 for i in xrange(3)] output2d_pooled = gpu_alloc_empty(inputs.shape[0], inputs.shape[1], output_shape[0], output_shape[1], image_shape[2]) for z in range(image_shape[2]): pooled_slice = dnn_pool(inputs[:,:,:,:,z], ws=pool_shape[0:2], stride=pool_stride[0:2], mode=mode) output2d_pooled = T.set_subtensor(output2d_pooled[:,:,:,:,z], pooled_slice) # now 1d-pool over last dimension... # could use first or second dimension as input of pool1d.. # compute maximum y index after first pooling output = gpu_alloc_empty(inputs.shape[0], inputs.shape[1], output_shape[0], output_shape[1], output_shape[2]) max_y = output_shape[1] for y in range(max_y): # ignore first=0 dimension, alrdy pooled in loop before # so set stride and shape to 1 there final_pooled_slice = dnn_pool(output2d_pooled[:,:,:,y,:], ws=(1, pool_shape[2]), stride=(1, pool_stride[2]), mode=mode) output = T.set_subtensor(output[:,:,:,y,:], final_pooled_slice) return output
def deconv(self, X, subsample=(2, 2), border_mode=(2, 2), conv_mode='conv', atype='sigmoid'): """ sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ #Always return a c contiguous output. #Copy the input only if it is not already c contiguous. img = gpu_contiguous(X) kerns = gpu_contiguous(self.W) #Implement Alloc on the gpu, but without initializing memory. gpu_alloc_img_shape = gpu_alloc_empty(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape #This Op builds a convolution descriptor for use in the other convolution operations. desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(gpu_alloc_img_shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) #The convolution gradient with respect to the inputs. d_img = GpuDnnConvGradI()(kerns, img, out, desc) return activation_fn_th(d_img + self.b.dimshuffle('x', 0, 'x', 'x'), atype=atype)
def __init__(self, rng, input, filter_shape=None,W=None, b=None, init='something', border=None, subsample=(1,1)): #weight and bias init if none are given. if init == 'zero': if W is None: W_values = np.zeros(filter_shape, dtype=theano.config.floatX) W = theano.shared(value=W_values, name='W_conv', borrow=True) else: if W is None: fan_in = np.prod(filter_shape[1:]) fan_out = (filter_shape[0] * np.prod(filter_shape[2:])) # initialize weights with random weights W_bound = np.sqrt(6. / (fan_in + fan_out)) W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), name='W_conv', borrow=True ) if b is None: # the bias is a 1D tensor -- one bias per output feature map b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) b = theano.shared(value=b_values, name='b_conv', borrow=True) self.W = W self.b = b self.filter_shape = W.shape.eval() self.border = border # This is largely based on https://github.com/Newmu/dcgan_code/blob/master/lib/ops.py#L85 with some minor changes. if border == 'same': assert self.filter_shape[2] % 2 == 1 and self.filter_shape[3] % 2 == 1 self.border_padding = ((self.filter_shape[2]-1)//2, (self.filter_shape[3]-1)//2) out = basic_ops.gpu_alloc_empty(input.shape[0], self.W.shape[1], input.shape[2]*subsample[0], input.shape[3]*subsample[1]) elif border == 'valid': self.border_padding = (0,0) out = basic_ops.gpu_alloc_empty(input.shape[0], self.W.shape[1], input.shape[2]*subsample[0]+(self.filter_shape[2]-1), input.shape[3]*subsample[1]+(self.filter_shape[3]-1)) else: return NotImplementedError() self.subsample = subsample img = basic_ops.gpu_contiguous(input - self.b.dimshuffle('x', 0, 'x', 'x')) kerns = basic_ops.gpu_contiguous(self.W) desc = dnn.GpuDnnConvDesc(border_mode=self.border_padding, subsample=self.subsample, conv_mode='conv')(basic_ops.gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape, kerns.shape) d_img = dnn.GpuDnnConvGradI()(kerns, img, out, desc) conv_out = d_img self.output = conv_out # store parameters of this layer self.params = [self.W, self.b] self.input = input
def convolve(self, input, **kwargs): img = gpu_contiguous(input) kerns = gpu_contiguous(self.W) out_shape = self.get_output_shape_for(img.shape) desc = GpuDnnConvDesc(border_mode=self.border_mode, subsample=self.subsample)(gpu_alloc_empty(out_shape[0], out_shape[1], out_shape[2], out_shape[3]).shape, kerns.shape) out_mem = gpu_alloc_empty(out_shape[0], out_shape[1], out_shape[2], out_shape[3]) return GpuDnnConvGradI()(kerns, img, out_mem, desc)
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode="conv"): img = gpu_contiguous(X) kerns = gpu_contiguous(w) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)( gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2] * subsample[0], img.shape[3] * subsample[1]).shape, kerns.shape, ) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2] * subsample[0], img.shape[3] * subsample[1]) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def grad(self, inp, grads): kerns, top, output, desc, alpha, beta = inp img, = grads img = gpu_contiguous(img) d_kerns = GpuDnn3dConvGradW()(img, top, gpu_alloc_empty(*kerns.shape), desc) d_top = GpuDnn3dConv()(img, kerns, gpu_alloc_empty(*top.shape), desc) d_alpha = grad_not_implemented(self, 4, alpha) d_beta = grad_not_implemented(self, 5, beta) return (d_kerns * alpha, d_top * alpha, img * beta, DisconnectedType()(), d_alpha, d_beta)
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): """ sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ img = gpu_contiguous(X) kerns = gpu_contiguous(w.dimshuffle(1,0,2,3)) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def convolve(self, input, **kwargs): img = gpu_contiguous(input) kerns = gpu_contiguous(self.W) out_shape = self.get_output_shape_for(img.shape) desc = GpuDnnConvDesc(border_mode=self.border_mode, subsample=self.subsample)(gpu_alloc_empty( out_shape[0], out_shape[1], out_shape[2], out_shape[3]).shape, kerns.shape) out_mem = gpu_alloc_empty(out_shape[0], out_shape[1], out_shape[2], out_shape[3]) return GpuDnnConvGradI()(kerns, img, out_mem, desc)
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): """ Taken from DCGAN repo:https://github.com/Newmu/dcgan_code/blob/master/lib/ops.py sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ img = gpu_contiguous(X) kerns = gpu_contiguous(w) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def grad(self, inp, grads): img, top, output, desc, alpha, beta = inp kerns, = grads kerns = gpu_contiguous(kerns) d_img = GpuDnn3dConvGradI()(kerns, top, gpu_alloc_empty(*img.shape), desc) d_top = GpuDnn3dConv()(img, kerns, gpu_alloc_empty(*top.shape), desc) d_alpha = grad_not_implemented(self, 4, alpha) d_beta = grad_not_implemented(self, 5, beta) return (d_img * alpha, d_top * alpha, kerns * beta, DisconnectedType()(), d_alpha, d_beta)
def grad(self, inp, grads): img, kerns, output, desc, alpha, beta = inp top, = grads top = gpu_contiguous(top) d_img = GpuDnn3dConvGradI()(kerns, top, gpu_alloc_empty(*img.shape), desc) d_kerns = GpuDnn3dConvGradW()(img, top, gpu_alloc_empty(*kerns.shape), desc) d_alpha = grad_not_implemented(self, 4, alpha) d_beta = grad_not_implemented(self, 5, beta) return [d_img * alpha, d_kerns * alpha, top * beta, DisconnectedType()(), d_alpha, d_beta]
def test_dnn_conv_inplace(): """This test that we have inplace work correctly even when GpuAllocEmpty get merged together. """ if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) img_shp = [2, 5, 6, 8] kern_shp = [3, 5, 5, 6] img = T.ftensor4('img') kern = T.ftensor4('kern') out = T.ftensor4('out') desc1 = dnn.GpuDnnConvDesc(border_mode='valid', conv_mode='conv')(img.shape, kern.shape) desc2 = dnn.GpuDnnConvDesc(border_mode='valid', conv_mode='cross')(img.shape, kern.shape) # Test forward op o1 = dnn.dnn_conv(img, kern, conv_mode='conv') o2 = dnn.dnn_conv(img, kern, conv_mode='cross') f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) d1, d2 = f( numpy.random.rand(*img_shp).astype('float32'), numpy.random.rand(*kern_shp).astype('float32')) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConv)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2 # Test grad w op out = gpu_alloc_empty(*kern.shape) o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc1) o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc2) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2 # Test grad i op out = gpu_alloc_empty(*img.shape) o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc1) o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc2) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2
def deconv(self, X, subsample=(2, 2), border_mode=(2, 2), conv_mode='conv', atype='sigmoid', testF=False): """ sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ #Always return a c contiguous output. #Copy the input only if it is not already c contiguous. img = gpu_contiguous(X) kerns = gpu_contiguous(self.W) #Implement Alloc on the gpu, but without initializing memory. if someconfigs.backend == 'gpuarray': gpu_alloc_img_shape = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape #This Op builds a convolution descriptor for use in the other convolution operations. desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\ conv_mode=conv_mode)(kerns.shape) out = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) elif someconfigs.backend == 'cudandarray': gpu_alloc_img_shape = gpu_alloc_empty(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\ conv_mode=conv_mode)(gpu_alloc_img_shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) #The convolution gradient with respect to the inputs. d_img = GpuDnnConvGradI()(kerns, img, out, desc) ConH0 = d_img #+ self.b.dimshuffle('x', 0, 'x', 'x') if testF: ConH1 = (ConH0 - self.stat_mean.dimshuffle('x', 0, 'x', 'x')) \ / (self.stat_std.dimshuffle('x', 0, 'x', 'x') + TINY) else: mean = ConH0.mean(axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x') std = T.mean(T.sqr(ConH0 - mean), axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x') ConH1 = (ConH0 - mean) / T.sqrt(std + TINY) ConH2 = self.eta.dimshuffle('x', 0, 'x', 'x') * ConH1 \ + self.beta.dimshuffle('x', 0, 'x', 'x') return activation_fn_th(ConH2, atype=atype)
def test_dnn_conv_inplace(): """This test that we have inplace work correctly even when GpuAllocEmpty get merged together. """ if not cuda.dnn.dnn_available(): raise SkipTest(cuda.dnn.dnn_available.msg) img_shp = [2, 5, 6, 8] kern_shp = [3, 5, 5, 6] img = T.ftensor4('img') kern = T.ftensor4('kern') out = T.ftensor4('out') desc1 = dnn.GpuDnnConvDesc(border_mode='valid', conv_mode='conv')( img.shape, kern.shape) desc2 = dnn.GpuDnnConvDesc( border_mode='valid', conv_mode='cross')(img.shape, kern.shape) # Test forward op o1 = dnn.dnn_conv(img, kern, conv_mode='conv') o2 = dnn.dnn_conv(img, kern, conv_mode='cross') f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) d1, d2 = f(numpy.random.rand(*img_shp).astype('float32'), numpy.random.rand(*kern_shp).astype('float32')) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConv)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2 # Test grad w op out = gpu_alloc_empty(*kern.shape) o1 = dnn.GpuDnnConvGradW()(img, kern, out, desc1) o2 = dnn.GpuDnnConvGradW()(img, kern, out, desc2) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradW)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2 # Test grad i op out = gpu_alloc_empty(*img.shape) o1 = dnn.GpuDnnConvGradI()(img, kern, out, desc1) o2 = dnn.GpuDnnConvGradI()(img, kern, out, desc2) f = theano.function([img, kern], [o1, o2], mode=mode_with_gpu) topo = f.maker.fgraph.toposort() convs = [n for n in topo if isinstance(n.op, dnn.GpuDnnConvGradI)] assert len(convs) == 2 assert all([node.op.inplace for node in convs]) assert len([n for n in topo if isinstance(n.op, GpuAllocEmpty)]) == 2
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): img = gpu_contiguous(X) kerns = gpu_contiguous(w) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(gpu_alloc_empty( img.shape[0], kerns.shape[1], img.shape[2] * subsample[0], img.shape[3] * subsample[1]).shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2] * subsample[0], img.shape[3] * subsample[1]) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): #https://github.com/Newmu/dcgan_code/lib/ops.py from theano.sandbox.cuda.basic_ops import (gpu_contiguous, gpu_alloc_empty) from theano.sandbox.cuda.dnn import GpuDnnConvDesc, GpuDnnConvGradI """ sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ img = gpu_contiguous(X) kerns = gpu_contiguous(w) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def build_graph(self, state_below): filters = self.filters nfilters = self.nfilters b = self.b border_mode = self.border_mode # activ = self.activ batch_size = state_below.shape[0] out_size = DeConvNet.infer_size(state_below.shape[1:3], filters.shape[2:], self.stride, self.border_mode) out_shape = [batch_size, nfilters, out_size[0], out_size[1]] state_below = state_below.dimshuffle(0, 3, 1, 2) filters = gpu_contiguous(filters) state_below = gpu_contiguous(state_below) out_shape = tensor.stack(out_shape) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=self.stride, conv_mode='conv')(out_shape, filters.shape) pred = GpuDnnConvGradI()( filters, state_below, gpu_alloc_empty(*out_shape), desc) pred += b.dimshuffle('x', 0, 'x', 'x') pred = pred.dimshuffle(0, 2, 3, 1) return eval(self.activ)(pred)
def dnn_3dconv(img, kerns, subsample=(1, 1), conv_mode='conv'): """ GPU 3d convolution using cuDNN from NVIDIA. The memory layout to use is 'bc012', that is 'batch', 'channel', 'first dim', 'second dim', 'third dim' in that order. :param img: images to do the convolution over :param kerns: convolution filters :param subsample: perform subsampling of the output (default: (1, 1)) :warning: The cuDNN library only works with GPU that have a compute capability of 3.0 or higer. This means that older GPU will not work with this Op. """ img = gpu_contiguous(img) kerns = gpu_contiguous(kerns) desc = GpuDnnConv3dDesc(subsample=tuple(subsample), conv_mode=conv_mode)() desc_op = desc.owner.op out_shp = GpuDnn3dConv.get_out_shape(img.shape, kerns.shape, desc_op.subsample) out = gpu_alloc_empty(*out_shp) return GpuDnn3dConv()(img, kerns, out, desc)
def build_graph(self, state_below): filters = self.filters nfilters = self.nfilters b = self.b border_mode = self.border_mode # activ = self.activ batch_size = state_below.shape[0] out_size = DeConvNet.infer_size(state_below.shape[1:3], filters.shape[2:], self.stride, self.border_mode) out_shape = [batch_size, nfilters, out_size[0], out_size[1]] state_below = state_below.dimshuffle(0, 3, 1, 2) filters = gpu_contiguous(filters) state_below = gpu_contiguous(state_below) out_shape = tensor.stack(out_shape) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=self.stride, conv_mode='conv')(out_shape, filters.shape) pred = GpuDnnConvGradI()(filters, state_below, gpu_alloc_empty(*out_shape), desc) pred += b.dimshuffle('x', 0, 'x', 'x') pred = pred.dimshuffle(0, 2, 3, 1) return eval(self.activ)(pred)
def _deconv2d(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): """ from Alec (https://github.com/Newmu/dcgan_code/blob/master/lib/ops.py) sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ img = gpu_contiguous(X) kerns = gpu_contiguous(w) out = gpu_alloc_empty( img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1] ) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode) desc = desc( out.shape, kerns.shape ) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def call(self, x, mask=None): x = gpu_contiguous(x) k = gpu_contiguous(self.W) new_size = (x.shape[0], k.shape[1], x.shape[2]*self.subsample[0], x.shape[3]*self.subsample[1]) out = gpu_alloc_empty(*new_size) desc = GpuDnnConvDesc(border_mode=self.border_mode, subsample=self.subsample, conv_mode=self.conv_mode)(out.shape, k.shape) return GpuDnnConvGradI()(k, x, out, desc)
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): """ sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ from theano.sandbox.cuda.basic_ops import gpu_contiguous, gpu_alloc_empty from theano.sandbox.cuda.dnn import GpuDnnConvDesc, GpuDnnConvGradI img = gpu_contiguous(X) kerns = gpu_contiguous(w) empty = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(empty, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0], img.shape[3]*subsample[1]) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def apply(self, input_): if self.use_bias: W, b = self.parameters else: W, = self.parameters W = W.dimshuffle(1, 0, 2, 3) img = gpu_contiguous(input_) kerns = gpu_contiguous(W) desc = GpuDnnConvDesc(border_mode=self.pad, subsample=self.stride, conv_mode='conv')(gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*self.stride[0], img.shape[3]*self.stride[1]).shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*self.stride[0], img.shape[3]*self.stride[1]) output = GpuDnnConvGradI()(kerns, img, out, desc) if self.use_bias: output += b.dimshuffle('x', 0, 'x', 'x') return output
def apply(self, input_): if self.use_bias: W, b = self.parameters else: W, = self.parameters W = W.dimshuffle(1, 0, 2, 3) img = gpu_contiguous(input_) kerns = gpu_contiguous(W) desc = GpuDnnConvDesc(border_mode=self.pad, subsample=self.stride, conv_mode='conv')(gpu_alloc_empty( img.shape[0], kerns.shape[1], img.shape[2] * self.stride[0], img.shape[3] * self.stride[1]).shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2] * self.stride[0], img.shape[3] * self.stride[1]) output = GpuDnnConvGradI()(kerns, img, out, desc) if self.use_bias: output += b.dimshuffle('x', 0, 'x', 'x') return output
def dnn_pool3d2d(inputs, pool_shape, pool_stride, image_shape, mode='max'): """ Pool first all time-slices, so 2d-poolings over width and height. Then do a 1dpooling over the time (done as fake2d pooling with pooling shape 1 for the ignored dimension.""" for i in xrange(3): assert pool_shape[i] <= image_shape[i], ( "pool shape should be less" " or equal than image shape, {:d} > {:d} for " "pool_shape: {:s}, image_shape:{:s}").format( pool_shape[i], image_shape[i], pool_shape, image_shape) output_shape = [((image_shape[i] - pool_shape[i]) // pool_stride[i]) + 1 for i in xrange(3)] output2d_pooled = gpu_alloc_empty(inputs.shape[0], inputs.shape[1], output_shape[0], output_shape[1], image_shape[2]) for z in range(image_shape[2]): pooled_slice = dnn_pool(inputs[:, :, :, :, z], ws=pool_shape[0:2], stride=pool_stride[0:2], mode=mode) output2d_pooled = T.set_subtensor(output2d_pooled[:, :, :, :, z], pooled_slice) # now 1d-pool over last dimension... # could use first or second dimension as input of pool1d.. # compute maximum y index after first pooling output = gpu_alloc_empty(inputs.shape[0], inputs.shape[1], output_shape[0], output_shape[1], output_shape[2]) max_y = output_shape[1] for y in range(max_y): # ignore first=0 dimension, alrdy pooled in loop before # so set stride and shape to 1 there final_pooled_slice = dnn_pool(output2d_pooled[:, :, :, y, :], ws=(1, pool_shape[2]), stride=(1, pool_stride[2]), mode=mode) output = T.set_subtensor(output[:, :, :, y, :], final_pooled_slice) return output
def convolve(self, input, **kwargs): # Messy to have these imports here, but seems to allow for switching DNN off. from theano.sandbox.cuda.basic_ops import (as_cuda_ndarray_variable, host_from_gpu, gpu_contiguous, HostFromGpu, gpu_alloc_empty) from theano.sandbox.cuda.dnn import GpuDnnConvDesc, GpuDnnConv, GpuDnnConvGradI, dnn_conv, dnn_pool # Straight outta Radford img = gpu_contiguous(input) kerns = gpu_contiguous(self.W) desc = GpuDnnConvDesc(border_mode=self.crop, subsample=self.stride, conv_mode='conv')(gpu_alloc_empty( img.shape[0], kerns.shape[1], img.shape[2] * self.stride[0], img.shape[3] * self.stride[1]).shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2] * self.stride[0], img.shape[3] * self.stride[1]) conved = GpuDnnConvGradI()(kerns, img, out, desc) return conved
def lmul(self, x): """ .. todo:: WRITEME properly dot(x, A) This method overrides the original Conv2D lmul to make it work with arbitrary axis orders Parameters ---------- x : TODO TODO """ # x must be formatted as batch index, channel, topo dim 0, topo dim 1 # for use with conv2d, so check what the current input space format is assert x.ndim == 4 axes = self._input_space.axes assert len(axes) == 4 op_axes = ('b', 'c', 0, 1) if tuple(axes) != op_axes: x = x.dimshuffle(*[axes.index(ax) for ax in op_axes]) # The calling format has to be changed img = gpu_contiguous(x) kerns = gpu_contiguous(self._filters) shape = GpuDnnConv.get_out_shape(img.shape, kerns.shape, self._border_mode, self._subsample) rval = gpu_alloc_empty(*shape) desc = self._desc(img.shape, kerns.shape) rval = self._conv_op(img, kerns, rval, desc) # Format the output based on the output space axes = self._output_axes assert len(axes) == 4 if tuple(self._output_axes) != op_axes: rval = rval.dimshuffle( *[op_axes.index(ax) for ax in self._output_axes]) return rval
def lmul(self, x): """ .. todo:: WRITEME properly dot(x, A) This method overrides the original Conv2D lmul to make it work with arbitrary axis orders Parameters ---------- x : TODO TODO """ # x must be formatted as batch index, channel, topo dim 0, topo dim 1 # for use with conv2d, so check what the current input space format is assert x.ndim == 4 axes = self._input_space.axes assert len(axes) == 4 op_axes = ('b', 'c', 0, 1) if tuple(axes) != op_axes: x = x.dimshuffle(*[axes.index(ax) for ax in op_axes]) # The calling format has to be changed img = gpu_contiguous(x) kerns = gpu_contiguous(self._filters) shape = GpuDnnConv.get_out_shape( img.shape, kerns.shape, self._border_mode, self._subsample) rval = gpu_alloc_empty(*shape) desc = self._desc(img.shape, kerns.shape) rval = self._conv_op(img, kerns, rval, desc) # Format the output based on the output space axes = self._output_axes assert len(axes) == 4 if tuple(self._output_axes) != op_axes: rval = rval.dimshuffle(*[op_axes.index(ax) for ax in self._output_axes]) return rval
def symb_forward(self, symb_input): # Calls directly into CUDNN's gradient methods to insert a backward-conv Op. # This code is originally taken from https://github.com/Newmu/dcgan_code/blob/master/lib/ops.py # and extended to more complex scenarios (stride, border) img = gpu_contiguous(symb_input) kerns = gpu_contiguous(self.W.param) alloc_shape = (img.shape[0], self.nchan_out) + tuple((i-1)*s - 2*b + f for i,s,b,f in zip(img.shape[2:], self.stride, self.border, self.filter_size)) out = gpu_alloc_empty(*alloc_shape) desc = dnn.GpuDnnConvDesc(border_mode=self.border, subsample=self.stride, conv_mode=self.mode)(out.shape, kerns.shape) grad = dnn.GpuDnnConv3dGradI if symb_input.ndim == 5 else dnn.GpuDnnConvGradI conv_output = grad()(kerns, img, out, desc) if self.b is not None: d_shuffle = ('x', 0) + tuple('x') * (symb_input.ndim-2) conv_output += self.b.param.dimshuffle(*d_shuffle) return conv_output