def thunk(): mask_idx = inputs[0][0] image = inputs[1][0] batch_size = min(mask_idx.shape[0], image.shape[0]) assert shape_ok(mask_idx.shape) assert shape_ok(image.shape) mask_idx = to_gpuarray(mask_idx) image = to_gpuarray(image) s = mask_idx.shape[3] assert mask_idx.shape[2] == mask_idx.shape[3], \ "height and width must be equal" sdata_shape = (3 * len(MASK), batch_size, 1, s, s) self._sdata = pycuda_zeros(self._sdata, sdata_shape) blocks_max = 32 blocks_s = min(blocks_max, s) grid_s = math.ceil(s / blocks_max) grid = (batch_size, grid_s, grid_s) block = (1, blocks_s, blocks_s) image_mask_split(mask_idx, image, np.int32(batch_size), np.int32(s), self._sdata, block=block, grid=grid) sdata_as_theano = to_cudandarray(self._sdata) m = len(MASK) outputs[0][0] = sdata_as_theano[:m] outputs[1][0] = sdata_as_theano[m:2 * m] outputs[2][0] = sdata_as_theano[2 * m:]
def thunk(): mask_idx = inputs[0][0] image = inputs[1][0] batch_size = min(mask_idx.shape[0], image.shape[0]) assert shape_ok(mask_idx.shape) assert shape_ok(image.shape) mask_idx = to_gpuarray(mask_idx) image = to_gpuarray(image) s = mask_idx.shape[3] assert mask_idx.shape[2] == mask_idx.shape[3], \ "height and width must be equal" sdata_shape = (3*len(MASK), batch_size, 1, s, s) self._sdata = pycuda_zeros(self._sdata, sdata_shape) blocks_max = 32 blocks_s = min(blocks_max, s) grid_s = math.ceil(s / blocks_max) grid = (batch_size, grid_s, grid_s) block = (1, blocks_s, blocks_s) image_mask_split(mask_idx, image, np.int32(batch_size), np.int32(s), self._sdata, block=block, grid=grid) sdata_as_theano = to_cudandarray(self._sdata) m = len(MASK) outputs[0][0] = sdata_as_theano[:m] outputs[1][0] = sdata_as_theano[m:2*m] outputs[2][0] = sdata_as_theano[2*m:]
def test_to_cudandarray(): px = pycuda.gpuarray.zeros((3, 4, 5), 'float32') cx = to_cudandarray(px) assert isinstance(cx, cuda.CudaNdarray) assert numpy.allclose(px.get(), numpy.asarray(cx)) assert px.dtype == cx.dtype assert px.shape == cx.shape assert all(numpy.asarray(cx._strides) * 4 == px.strides) try: px = pycuda.gpuarray.zeros((3, 4, 5), 'float64') to_cudandarray(px) assert False except ValueError: pass try: to_cudandarray(numpy.zeros(4)) assert False except ValueError: pass
def thunk(): grad = outputs[0][0] mask_idx = inputs[0][0] assert shape_ok(mask_idx.shape) s = mask_idx.shape[3] block_dim = min(32, s) grid_dim = math.ceil(s / block_dim) mask_idx = to_gpuarray(mask_idx, copyif=True) image = inputs[1][0] assert shape_ok(image.shape) image = to_gpuarray(image, copyif=True) batch_size = min(mask_idx.shape[0], image.shape[0]) grad_shape = (batch_size, 1, s, s) grad = pycuda_zeros(grad, grad_shape) grid = (batch_size, grid_dim, grid_dim) block = (1, block_dim, block_dim) if "sum" in self.connected and "pow" in self.connected: og_sum = to_gpuarray(inputs[2][0], copyif=True) og_pow = to_gpuarray(inputs[3][0], copyif=True) image_mask_split_grad(mask_idx, image, og_sum, og_pow, np.int32(batch_size), np.int32(s), grad, block=block, grid=grid) elif "sum" in self.connected: og_sum = to_gpuarray(inputs[2][0], copyif=True) image_mask_split_grad(mask_idx, image, og_sum, np.int32(batch_size), np.int32(s), grad, block=block, grid=grid) elif "pow" in self.connected: og_pow = to_gpuarray(inputs[2][0], copyif=True) image_mask_split_grad(mask_idx, image, og_pow, np.int32(batch_size), np.int32(s), grad, block=block, grid=grid) outputs[0][0] = to_cudandarray(grad)
def thunk(): grad = outputs[0][0] mask_idx = inputs[0][0] assert shape_ok(mask_idx.shape) s = mask_idx.shape[3] block_dim = min(32, s) grid_dim = math.ceil(s / block_dim) mask_idx = to_gpuarray(mask_idx, copyif=True) image = inputs[1][0] assert shape_ok(image.shape) image = to_gpuarray(image, copyif=True) batch_size = min(mask_idx.shape[0], image.shape[0]) grad_shape = (batch_size, 1, s, s) grad = pycuda_zeros(grad, grad_shape) grid = (batch_size, grid_dim, grid_dim) block = (1, block_dim, block_dim) if "sum" in self.connected and "pow" in self.connected: og_sum = to_gpuarray(inputs[2][0], copyif=True) og_pow = to_gpuarray(inputs[3][0], copyif=True) image_mask_split_grad( mask_idx, image, og_sum, og_pow, np.int32(batch_size), np.int32(s), grad, block=block, grid=grid) elif "sum" in self.connected: og_sum = to_gpuarray(inputs[2][0], copyif=True) image_mask_split_grad( mask_idx, image, og_sum, np.int32(batch_size), np.int32(s), grad, block=block, grid=grid) elif "pow" in self.connected: og_pow = to_gpuarray(inputs[2][0], copyif=True) image_mask_split_grad( mask_idx, image, og_pow, np.int32(batch_size), np.int32(s), grad, block=block, grid=grid) outputs[0][0] = to_cudandarray(grad)