cudaGetErrorString(sts), n_blocks.x, n_blocks.y, n_threads.x, n_threads.y, n_threads.z, n_shared); %(fail)s; } } // END NESTED SCOPE """ % locals() def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step) @local_optimizer([Images2Neibs]) def use_gpu_images2neibs(node): if (type(node.op) is Images2Neibs and node.inputs[0].dtype == 'float32' and node.op.mode in ['valid', 'ignore_borders', 'wrap_centered']): return [host_from_gpu(gpu_images2neibs(gpu_from_host(node.inputs[0]), node.inputs[1], node.inputs[2], mode=node.op.mode))] if cuda_available: register_gpu_opt()(use_gpu_images2neibs)
return code @local_optimizer([CumsumOp]) def use_gpu_cumsum(node): if type(node.op) is CumsumOp \ and node.inputs[0].dtype == 'float32' \ and node.inputs[0].owner \ and isinstance(node.inputs[0].owner.op, HostFromGpu): axis = node.op.axis x = node.inputs[0] if axis is not None and x.ndim > GpuCumsum.SUPPORTED_NDIMS: return None x = gpu_from_host(x) if axis is None and x.ndim > 1: x = GpuFlatten()(x) # ``gpu_cumsum`` assume array has been flattened if needed. if axis is None: axis = 0 return [host_from_gpu(GpuCumsum(axis)(x))] if cuda_available: register_gpu_opt()(use_gpu_cumsum)
n_threads.x, n_threads.y, n_threads.z, n_shared); %(fail)s; } } // END NESTED SCOPE """ % locals() def gpu_images2neibs(ten4, neib_shape, neib_step=None, mode='valid'): return GpuImages2Neibs(mode)(ten4, neib_shape, neib_step) @local_optimizer() def use_gpu_images2neibs(node): if (type(node.op) is Images2Neibs and node.inputs[0].dtype == 'float32' and node.op.mode in ['valid', 'ignore_borders', 'wrap_centered']): return [ host_from_gpu( gpu_images2neibs(gpu_from_host(node.inputs[0]), node.inputs[1], node.inputs[2], mode=node.op.mode)) ] if cuda_available: register_gpu_opt()(use_gpu_images2neibs)