def grad2d(x): if prg is None: build(x) queue = x.queue gx = npcl.zeros_like(x) gy = npcl.zeros_like(x) prg.grad(queue, x.shape[::-1], None, x.data, gx.data, gy.data) return gx, gy
def divergence2d(px, py): if prg is None: build(px) queue = px.queue d = npcl.zeros_like(px) prg.divergence2d(queue, d.shape[::-1], None, px.data, py.data, d.data) return d
def norm2d(gx, gy): if prg is None: build(gx) queue = gx.queue norm = npcl.zeros_like(gx) prg.norm(queue, norm.shape[::-1], None, gx.data, gy.data, norm.data) return norm
def denoise_tv(image, weight=0.1, eps=2.e-4, n_iter_max=100): img_dev = image.copy() ndim = 2 weight = np.float32(weight) eps = np.float32(eps) px = npcl.zeros_like(image) py = npcl.zeros_like(image) d = npcl.zeros_like(image) tau = np.float32(1/(2.*ndim)) N = np.float32(img_dev.shape[0]*img_dev.shape[1]) i = 0 while i < n_iter_max: if i > 0: # d will be the (negative) divergence of p d = divergence2d(px, py) d = -d out = img_dev + d else: out = img_dev E = npcl.sum((d ** 2)).get() # (gx, gy) stores the gradients of out along each axis gx, gy = grad2d(out) norm = norm2d(gx, gy) E += weight*npcl.sum(norm).get() norm *= tau/weight norm += np.float32(1) px = px-tau*gx py = py-tau*gy px /= norm py /= norm E /= N if i == 0: E_init = E E_previous = E else: if np.abs(E_previous-E) < eps * E_init: break else: E_previous = E i += 1 return out
def inpaint_tv( img, mask, mu=np.float32(1e-2), gamma=np.float32(1e-1), tol=np.float32(1e-4), max_iter=1000, verbose=False, ): """ Total Variation Inpainting with Split Bregman method. """ def masked_laplacian(x): px = convolve(x, dx) py = convolve(x, dy) qx = convolve(mask * px, dxT) qy = convolve(mask * py, dyT) return qx + qy def masked_divergence(px, py): qx = convolve(mask * px, dxT) qy = convolve(mask * py, dyT) return qx + qy def A(x): return (1 - mask) * x + gamma * masked_laplacian(x) d1, d2, b1, b2, uf, ul = [npcl.zeros_like(img) for _ in range(6)] img_norm = npcl.sum(img**2) for k in range(max_iter): # u-subproblem b = (1 - mask) * img + gamma * masked_divergence(d1 - b1, d2 - b2) ul, k_sub = solve_cg(A, b, uf, max_iter=10) # d-subproblem u1 = convolve(ul, dx) u2 = convolve(ul, dy) d1, d2 = shrink(u1 + b1, u2 + b2, mu * mask / gamma) b1 = b1 + u1 - d1 b2 = b2 + u2 - d2 gap = npcl.sum((ul - uf)**2) / img_norm if verbose is True: print( 'iteration number: ', k + 1, ', gap: ', gap.get(), ) if gap < tol**2: break uf = ul.copy() return uf, k
def convolve2d_sv(x, k, padding='zero'): r""" Compute 2D spatially-variant convolution. This function computes y_{i, j} = \sum_{k, l} k_{k, l, i, j} x_{i + k, j + l}, where k : convolutional kernel. Inputs: x : input array (2D). k : convolutional kernel array (4D). dimensions : kernel window (2D) x image size (2D) Outputs: y : output array. """ if prg is None: build(x) if padding == 'zero': run_kernel = prg.convolve2d_sv_z elif padding == 'same': run_kernel = prg.convolve2d_sv_s elif padding == 'wrap': run_kernel = prg.convolve2d_sv_w queue = x.queue res = npcl.zeros_like(x) run_kernel( queue, x.shape[::-1], None, x.data, k.data, res.data, np.int32(k.shape[0]), np.int32(k.shape[1]), ) return res
def solve_flb( A, AT, b, delta=np.float32(5e-4), mu=np.float32(2e4), tol=np.float32(1e-5), stuck=np.float32(1e-2), verbose=False, max_iter=5000, ): """ Fast Linearized Bregman (FLB) Method. This function solves the following problem: minimize |x|_1 subject to Ax=b Inputs: ATA : a python function that computes A^TAx, i.e., ATA(x) = A^TAx for a vector (pyopencl.array.Array) x. AT : a python function that computes ATx, i.e., AT(x) = ATx for a vector (pyopencl.array.Array) x. b : (pyopencl.array.Array) represents the vector b. delta : (np.float32) parameter for gradient update step. mu : (np.float32) l1 regularization parameter. tol : (np.float32) represents tolerence value. stuck: (np.float32) represents stuck constant max_iter : maximum number of iterations. Outputs: x : (pyopencl.array.Array) the solution x. k : (int) the total iteration number. """ ATb = AT(b) def ATA(x): return AT(A(x)) def norm(x): return npcl.sum(npcl.fabs(x)).get() x = npcl.zeros_like(ATb) v = x.copy() normb = norm(b) kick_test = [0 for i in range(5)] k = 0 while True: k += 1 v += (ATb - ATA(x)) x_new = delta * soft_shrink(v, mu) residual = np.log(norm(A(x_new) - b)) if verbose is True: print('iteration number: ', k, 'log residual: ', residual) kick_test = kick_test[-4:] + [residual] if min(kick_test) + stuck > max(kick_test): k += 1 I_0 = (x_new == 0.).astype(np.float32) r = ATb - ATA(x_new) s = ((mu * sign(r) - v) / r).astype(np.int32) * I_0 smin = np.float32(npcl.min(s + 1e7 * (1 - I_0)).get()) if smin <= 2: v += r else: v += smin * I_0 * r x_new = delta * soft_shrink(v, mu) if verbose is True: print( 'kicking occured at iteration number: ', k, 'log residual: ', residual, ) r_new = A(x_new) - b if norm(r_new) < normb * tol: break if k >= max_iter: break x = x_new.copy() return x_new, k
def convolve2d(x, k, padding='zero'): """ Compute 2D convolution. This function computes y = (k*x), where k : convolutional kernel. Inputs: x : input array. k : convolutional kernel array. Outputs: y : output array. """ if prg is None: build(x) if use_local_mem(k.shape): if padding == 'zero': run_kernel = prg.convolve2d_loc_z elif padding == 'same': run_kernel = prg.convolve2d_loc_s elif padding == 'wrap': run_kernel = prg.convolve2d_loc_w queue = x.queue res = npcl.zeros_like(x) padded_shape = ( x.shape[0] + (-x.shape[0]) % TS, x.shape[1] + (-x.shape[1]) % TS, ) cache_size = 4 * (TS + 2 * (k.shape[0] // 2)) * (TS + 2 * (k.shape[1] // 2)) if 'intel' not in prg.context.devices[0].vendor.lower( ) or x.shape == padded_shape: run_kernel( queue, padded_shape[::-1], (TS, TS), x.data, k.data, cl.LocalMemory(cache_size), res.data, np.int32(x.shape[0]), np.int32(x.shape[1]), np.int32(k.shape[0]), np.int32(k.shape[1]), ) return res if padding == 'zero': run_kernel = prg.convolve2d_z elif padding == 'same': run_kernel = prg.convolve2d_s elif padding == 'wrap': run_kernel = prg.convolve2d_w queue = x.queue res = npcl.zeros_like(x) run_kernel( queue, x.shape[::-1], None, x.data, k.data, res.data, np.int32(k.shape[0]), np.int32(k.shape[1]), ) return res