def grad(self, inputs, dout): print "here" inputs, filters = inputs # if 'Cuda' not in str(type(images)): # raise TypeError("inputs must be cuda") # if 'Cuda' not in str(type(filters)): # raise TypeError("filters must be cuda") dout, = dout #dout = cuda.basic_ops.gpu_contiguous( # cuda.basic_ops.as_cuda_ndarray_variable(dout)) #dout = gpu_contiguous(dout) # if 'Cuda' not in str(type(dout)): # raise TypeError("output gradients must be cuda") filters = filters.dimshuffle(1, 0, 2, 3, 4) #inputs = inputs.dimshuffle(1, 0, 2, 3, 4) #print inputs.shape, filters.shape, dout.shape d_inputs = conv.conv3d_fft(dout, inputs, dout.shape, inputs.shape) d_filters = conv.conv3d_fft(dout, filters, dout.shape, filters.shape) return d_inputs, d_filters
def thunk(): inp = inputs[0][0] filters = inputs[1][0] # output_shape = self.input_shape # output_shape[-1] = (output_shape[-1] - 1) * 2 # restore full signal length # output_shape = tuple(output_shape) z = outputs[0] # batch size, input channels, input dim 0, input dim 1 b, ic, i0, i1, i2 = self.input_shape # output channels, input channels, filter dim 0, filter dim 1 oc, ic_, f0, f1, f2 = self.filter_shape # Output shape output_shape = [b, oc, i0 - f0 + 1, i1 - f1 + 1, i2 - f2 + 1] # only allocate if there is no previous allocation of the right size. if z[0] is None or z[0].shape != output_shape: z[0] = cuda.CudaNdarray.zeros(output_shape) output_pycuda = to_gpuarray(z[0]) print "Perform Conv" output_pycuda = conv.conv3d_fft(inp, filters, output_pycuda, self.input_shape, self.filter_shape) print "End of conv Conv"
def thunk(): inp = inputs[0][0] filters = inputs[1][0] # output_shape = self.input_shape # output_shape[-1] = (output_shape[-1] - 1) * 2 # restore full signal length # output_shape = tuple(output_shape) z = outputs[0] # batch size, input channels, input dim 0, input dim 1 b, ic, i0, i1, i2 = self.input_shape # output channels, input channels, filter dim 0, filter dim 1 oc, ic_, f0, f1, f2 = self.filter_shape # Output shape output_shape = [b, oc, i0 - f0 + 1, i1 - f1 + 1, i2 - f2 + 1] # only allocate if there is no previous allocation of the right size. if z[0] is None or z[0].shape != output_shape: z[0] = cuda.CudaNdarray.zeros(output_shape) output_pycuda = to_gpuarray(z[0]) print "Perform Conv" output_pycuda = conv.conv3d_fft(inp, filters, output_pycuda, self.input_shape, self.filter_shape) print "End of conv Conv"
def grad(self, inputs, dout): print "here" inputs, filters = inputs # if 'Cuda' not in str(type(images)): # raise TypeError("inputs must be cuda") # if 'Cuda' not in str(type(filters)): # raise TypeError("filters must be cuda") dout, = dout #dout = cuda.basic_ops.gpu_contiguous( # cuda.basic_ops.as_cuda_ndarray_variable(dout)) #dout = gpu_contiguous(dout) # if 'Cuda' not in str(type(dout)): # raise TypeError("output gradients must be cuda") filters = filters.dimshuffle(1, 0, 2, 3, 4) #inputs = inputs.dimshuffle(1, 0, 2, 3, 4) #print inputs.shape, filters.shape, dout.shape d_inputs = conv.conv3d_fft(dout, inputs, dout.shape, inputs.shape) d_filters = conv.conv3d_fft(dout, filters, dout.shape, filters.shape) return d_inputs, d_filters