def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)): inputs_val = numpy.random.random(inputs_shape).astype('float32') filters_val = numpy.random.random(filters_shape).astype('float32') inputs = shared(inputs_val) filters = shared(filters_val) bottom_height = (inputs_shape[1] - 1) * subsample[0] + filters_shape[1] bottom_width = (inputs_shape[2] - 1) * subsample[1] + filters_shape[2] bottom_depth = (inputs_shape[3] - 1) * subsample[2] + filters_shape[3] bottom_shape = theano.shared( numpy.array([bottom_height, bottom_width, bottom_depth])) weight = gpu_contiguous(filters.dimshuffle(0, 4, 1, 2, 3)) top = gpu_contiguous(inputs.dimshuffle(0, 4, 1, 2, 3)) if (subsample == (1, 1, 1)): conv_ref = Corr3dMM_gradInputs(subsample=subsample)(kern=weight, topgrad=top) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top) else: conv_ref = Corr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom_shape) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=weight, topgrad=top, shape=bottom_shape) f_ref = theano.function([], conv_ref) f = theano.function([], conv_gemm, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def run_gradinput(self, inputs_shape, filters_shape, subsample=(1, 1, 1)): inputs_shape = [inputs_shape[i] for i in (0, 4, 1, 2, 3)] filters_shape = [filters_shape[i] for i in (0, 4, 1, 2, 3)] inputs_val = np.random.random(inputs_shape).astype(config.floatX) filters_val = np.random.random(filters_shape).astype(config.floatX) inputs = gpuarray_shared_constructor(inputs_val) filters = gpuarray_shared_constructor(filters_val) bottom_height = (inputs_shape[2] - 1) * subsample[0] + filters_shape[2] bottom_width = (inputs_shape[3] - 1) * subsample[1] + filters_shape[3] bottom_depth = (inputs_shape[4] - 1) * subsample[2] + filters_shape[4] bottom_shape = gpuarray_shared_constructor( np.array([bottom_height, bottom_width, bottom_depth])) if (subsample == (1, 1, 1)): conv_ref = Corr3dMM_gradInputs(subsample=subsample)( kern=ref_cast(filters), topgrad=ref_cast(inputs)) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=filters, topgrad=inputs) else: conv_ref = Corr3dMM_gradInputs(subsample=subsample)( kern=ref_cast(filters), topgrad=ref_cast(inputs), shape=bottom_shape) conv_gemm = GpuCorr3dMM_gradInputs(subsample=subsample)( kern=filters, topgrad=inputs, shape=bottom_shape) f_ref = theano.function([], conv_ref, mode=mode_without_gpu) f = theano.function([], conv_gemm, mode=mode_with_gpu) res_ref = f_ref() res = f() utt.assert_allclose(res_ref, res)
def local_abstractconv3d_gradinputs_gemm(node): if theano.config.cxx == "" or not theano.config.blas.ldflags: return if not isinstance(node.op, AbstractConv3d_gradInputs): return None kern, topgrad, shape = node.inputs if not isinstance(kern.type, TensorType) or \ not isinstance(topgrad.type, TensorType): return None # need to flip the kernel if necessary if node.op.filter_flip: kern = kern[:, :, ::-1, ::-1, ::-1] rval = Corr3dMM_gradInputs(border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation)( kern, topgrad, shape) copy_stack_trace(node.outputs[0], rval) return [rval]
def local_abstractconv3d_gradinputs_gemm(node): # If theano.config.blas.ldflags is empty, Theano will use # a NumPy C implementation of [sd]gemm_. if theano.config.cxx == "" or node.inputs[0].dtype == 'float16': return if not isinstance(node.op, AbstractConv3d_gradInputs): return None kern, topgrad, shape = node.inputs if not isinstance(kern.type, TensorType) or \ not isinstance(topgrad.type, TensorType): return None # need to flip the kernel if necessary if node.op.filter_flip: kern = kern[:, :, ::-1, ::-1, ::-1] rval = Corr3dMM_gradInputs(border_mode=node.op.border_mode, subsample=node.op.subsample, filter_dilation=node.op.filter_dilation, num_groups=node.op.num_groups)(kern, topgrad, shape) copy_stack_trace(node.outputs[0], rval) return [rval]