def test_alloc_empty(): for dt in ["float32", "int8"]: f = theano.function([], GpuAllocEmpty(dt, context_name=test_ctx_name)(2, 3)) assert len(f.maker.fgraph.apply_nodes) == 1 out = f() assert out.shape == (2, 3) assert out.dtype == dt f = theano.function( [], [ GpuAllocEmpty("uint64", test_ctx_name)(3, 2), GpuAllocEmpty("uint64", test_ctx_name)(3, 2), ], ) out = f() assert out[0].shape == (3, 2) assert out[0].dtype == "uint64" assert out[1].shape == (3, 2) assert out[1].dtype == "uint64" assert (len([ node for node in f.maker.fgraph.apply_nodes if isinstance(node.op, GpuAllocEmpty) ]) == 1)
def deconv(self, X, subsample=(2, 2), border_mode=(2, 2), conv_mode='conv', atype='sigmoid'): """ sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ #Always return a c contiguous output. #Copy the input only if it is not already c contiguous. img = gpu_contiguous(X) kerns = gpu_contiguous(self.W) #Implement Alloc on the gpu, but without initializing memory. if someconfigs.backend == 'gpuarray': gpu_alloc_img_shape = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape #This Op builds a convolution descriptor for use in the other convolution operations. desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\ conv_mode=conv_mode)(kerns.shape) out = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) elif someconfigs.backend == 'cudandarray': gpu_alloc_img_shape = gpu_alloc_empty(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\ conv_mode=conv_mode)(gpu_alloc_img_shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) #The convolution gradient with respect to the inputs. d_img = GpuDnnConvGradI()(kerns, img, out, desc) return activation_fn_th(d_img, atype=atype)
def deconv(self, X, subsample=(2, 2), border_mode=(2, 2), conv_mode='conv', atype='sigmoid', testF=False): """ sets up dummy convolutional forward pass and uses its grad as deconv currently only tested/working with same padding """ #Always return a c contiguous output. #Copy the input only if it is not already c contiguous. img = gpu_contiguous(X) kerns = gpu_contiguous(self.W) #Implement Alloc on the gpu, but without initializing memory. if someconfigs.backend == 'gpuarray': gpu_alloc_img_shape = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape #This Op builds a convolution descriptor for use in the other convolution operations. desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\ conv_mode=conv_mode)(kerns.shape) out = GpuAllocEmpty('float32', None)(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) elif someconfigs.backend == 'cudandarray': gpu_alloc_img_shape = gpu_alloc_empty(img.shape[0], kerns.shape[1], \ img.shape[2]*subsample[0], img.shape[3]*subsample[1]).shape desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample,\ conv_mode=conv_mode)(gpu_alloc_img_shape, kerns.shape) out = gpu_alloc_empty(img.shape[0], kerns.shape[1], img.shape[2]*subsample[0],\ img.shape[3]*subsample[1]) #The convolution gradient with respect to the inputs. d_img = GpuDnnConvGradI()(kerns, img, out, desc) ConH0 = d_img #+ self.b.dimshuffle('x', 0, 'x', 'x') if testF: ConH1 = (ConH0 - self.stat_mean.dimshuffle('x', 0, 'x', 'x')) \ / (self.stat_std.dimshuffle('x', 0, 'x', 'x') + TINY) else: mean = ConH0.mean(axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x') std = T.mean(T.sqr(ConH0 - mean), axis=[0, 2, 3]).dimshuffle('x', 0, 'x', 'x') ConH1 = (ConH0 - mean) / T.sqrt(std + TINY) ConH2 = self.eta.dimshuffle('x', 0, 'x', 'x') * ConH1 \ + self.beta.dimshuffle('x', 0, 'x', 'x') return activation_fn_th(ConH2, atype=atype)
def deconv(X, w, subsample=(1, 1), border_mode=(0, 0), conv_mode='conv'): img = gpu_contiguous(T.cast(X, 'float32')) kerns = gpu_contiguous(T.cast(w, 'float32')) desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, conv_mode=conv_mode)(kerns.shape) out = GpuAllocEmpty(dtype='float32', context_name=infer_context_name(X))( img.shape[0], kerns.shape[1], img.shape[2] * subsample[0], img.shape[3] * subsample[1]) d_img = GpuDnnConvGradI()(kerns, img, out, desc) return d_img
def opt(node): if type(node.op) != op or node.op.inplace: return inputs = list(node.inputs) alloc = inputs[idx] if (alloc.owner and isinstance(alloc.owner.op, GpuAllocEmpty) and len(alloc.clients) > 1): alloc_op = GpuAllocEmpty(alloc.owner.op.dtype, alloc.owner.op.context_name) inputs[idx] = alloc_op(*alloc.owner.inputs) with inherit_stack_trace(node.outputs): return maker(node, inputs)
def apply(self, input_): if self.use_bias: W, b = self.parameters else: W, = self.parameters W = W.dimshuffle(1, 0, 2, 3) img = GpuContiguous(input_) kerns = GpuContiguous(W) desc = GpuDnnConvDesc(border_mode=self.pad, subsample=self.stride, conv_mode='conv')(GpuAllocEmpty( img.shape[0], kerns.shape[1], img.shape[2] * self.stride[0], img.shape[3] * self.stride[1]).shape, kerns.shape) out = GpuAllocEmpty(img.shape[0], kerns.shape[1], img.shape[2] * self.stride[0], img.shape[3] * self.stride[1]) output = GpuDnnConvGradI()(kerns, img, out, desc) if self.use_bias: output += b.dimshuffle('x', 0, 'x', 'x') return output
def local_abstractconv_cudnn_alt(node): if not isinstance(node.op, (AbstractConv2d, AbstractConv2d_gradWeights, AbstractConv2d_gradInputs)): return if version(raises=False) < 6000 and node.op.filter_dilation != (1, 1): return None if node.op.unshared: return None if isinstance(node.op.border_mode, tuple) and any( isinstance(p, tuple) for p in node.op.border_mode): # Asymmetric padding not yet supported return None inp1 = node.inputs[0] inp2 = node.inputs[1] if not dnn_available(inp1.type.context_name): return op = node.op border_mode = node.op.border_mode subsample = node.op.subsample filter_dilation = node.op.filter_dilation num_groups = node.op.num_groups precision, _ = get_precision(None, [inp1, inp2]) if node.op.filter_flip: conv_mode = "conv" else: conv_mode = "cross" if isinstance(op, AbstractConv2d): if border_mode == "half" or subsample != (1, 1) or num_groups != 1: return None if border_mode == "full": direction_hint = "bprop inputs" elif border_mode == "valid" and filter_dilation == (1, 1): direction_hint = "bprop weights" else: return None rval = dnn_conv( inp1, inp2, border_mode=border_mode, subsample=subsample, dilation=filter_dilation, direction_hint=direction_hint, conv_mode=conv_mode, num_groups=num_groups, ) elif isinstance(op, AbstractConv2d_gradWeights): if (border_mode == "valid" and subsample == (1, 1) and filter_dilation == (1, 1) and num_groups == 1): img = gpu_contiguous(inp1) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(img, topgrad) img = gpu_contiguous(img.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(topgrad.dimshuffle(1, 0, 2, 3)) ishape = [shape_i_op(i)(img) for i in range(img.ndim)] tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] out_shp = get_conv_output_shape( ishape, tshape, border_mode=border_mode, subsample=subsample, filter_dilation=filter_dilation, ) out_shp = assert_conv_shape(out_shp) out = GpuAllocEmpty(dtype=img.dtype, context_name=ctx_name)(*out_shp) desc = GpuDnnConvDesc( border_mode=border_mode, subsample=subsample, dilation=filter_dilation, conv_mode="cross", precision=precision, )(out.shape) conv = GpuDnnConv(algo=None, num_groups=num_groups)(img, topgrad, out, desc) if conv_mode == "conv": conv = conv[:, :, ::-1, ::-1] rval = as_gpuarray_variable(conv.dimshuffle(1, 0, 2, 3), ctx_name) else: return None elif isinstance(op, AbstractConv2d_gradInputs): if border_mode == "valid" and subsample == (1, 1) and num_groups == 1: kerns = gpu_contiguous(inp1.dimshuffle(1, 0, 2, 3)) topgrad = gpu_contiguous(inp2) ctx_name = infer_context_name(kerns, topgrad) conv_mode = "cross" if conv_mode == "conv" else "conv" desc = GpuDnnConvDesc( border_mode="full", subsample=subsample, dilation=filter_dilation, conv_mode=conv_mode, precision=precision, )(kerns.shape) tshape = [shape_i_op(i)(topgrad) for i in range(topgrad.ndim)] kshape = [shape_i_op(i)(kerns) for i in range(kerns.ndim)] shape = get_conv_output_shape( tshape, kshape, border_mode="full", subsample=subsample, filter_dilation=filter_dilation, ) shape = assert_conv_shape(shape) out = GpuAllocEmpty(dtype=topgrad.dtype, context_name=ctx_name)(*shape) rval = GpuDnnConv(algo=None, num_groups=num_groups)(topgrad, kerns, out, desc) else: return None return [rval]