def run_gradweight(self, inputs_shape, filters_shape, dCdH_shape, subsample=(1, 1)): inputs_shape = [inputs_shape[i] for i in (0, 3, 1, 2)] filters_shape = [filters_shape[i] for i in (0, 3, 1, 2)] dCdH_shape = [dCdH_shape[i] for i in (0, 3, 1, 2)] inputs_val = np.random.random(inputs_shape).astype(config.floatX) dCdH_val = np.random.random(dCdH_shape).astype(config.floatX) inputs = gpuarray_shared_constructor(inputs_val) dCdH = gpuarray_shared_constructor(dCdH_val) shape = gpuarray_shared_constructor(np.array(filters_shape[2:])) if (subsample == (1, 1)): conv_ref = CorrMM_gradWeights(subsample=subsample)( ref_cast(inputs), ref_cast(dCdH)) conv_gemm = GpuCorrMM_gradWeights(subsample=subsample)(inputs, dCdH) else: conv_ref = CorrMM_gradWeights(subsample=subsample)( ref_cast(inputs), ref_cast(dCdH), shape=shape) conv_gemm = GpuCorrMM_gradWeights(subsample=subsample)(inputs, dCdH, shape=shape) f_ref = theano.function([], conv_ref, mode=mode_without_gpu) f = theano.function([], conv_gemm, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def local_abstractconv_gradweight_gemm(node): # If theano.config.blas.ldflags is empty, Theano will use # a NumPy C implementation of [sd]gemm_. if theano.config.cxx == "" or node.inputs[0].dtype == "float16": return if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if not isinstance(img.type, TensorType) or not isinstance( topgrad.type, TensorType): return None rval = CorrMM_gradWeights( border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation, num_groups=node.op.num_groups, unshared=node.op.unshared, )(img, topgrad, shape) copy_stack_trace(node.outputs[0], rval) # need to flip the kernel if necessary if node.op.filter_flip: flip = (slice(None), ) * (rval.ndim - 2) + (slice(None, None, -1), ) * 2 rval = rval[flip] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval]
def local_abstractconv_gradweight_gemm(node): # If theano.config.blas.ldflags is empty, Theano will use # a NumPy C implementation of [sd]gemm_. if theano.config.cxx == "": return if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if not isinstance(img.type, TensorType) or \ not isinstance(topgrad.type, TensorType): return None rval = CorrMM_gradWeights(border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation)(img, topgrad, shape) copy_stack_trace(node.outputs[0], rval) # need to flip the kernel if necessary if node.op.filter_flip: rval = rval[:, :, ::-1, ::-1] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) copy_stack_trace(node.outputs[0], rval) return [rval]
def local_abstractconv_gradweight_gemm(node): if not isinstance(node.op, AbstractConv2d_gradWeights): return None img, topgrad, shape = node.inputs if not isinstance(img.type, TensorType) or \ not isinstance(topgrad.type, TensorType): return None rval = CorrMM_gradWeights(border_mode=node.op.border_mode, subsample=node.op.subsample)(img, topgrad, shape) # need to flip the kernel if necessary if node.op.filter_flip: rval = rval[:, :, ::-1, ::-1] rval = theano.tensor.patternbroadcast(rval, node.outputs[0].broadcastable) return [rval]