def _calcnormforward(self, rhs, lhs, idev, ifun, odd=0): if self.lhs[ifun] is False: rhs += clarray.vdot( self.outp[ ifun][ 2*idev+odd][:self.slices, ...] - self.inp[ ifun][ 2*idev+odd][0][:self.slices, ...], self.outp[ ifun][ 2*idev+odd][:self.slices, ...] - self.inp[ ifun][ 2*idev+odd][0][:self.slices, ...] ).get() else: lhs += clarray.vdot( self.outp[ ifun][ 2*idev+odd][:self.slices, ...] - self.inp[ ifun][ 2*idev+odd][-1][:self.slices, ...], self.outp[ ifun][ 2*idev+odd][:self.slices, ...] - self.inp[ ifun][ 2*idev+odd][-1][:self.slices, ...] ).get() return (rhs, lhs)
def _calcResidual(self, step_out, tmp_results, step_in, data): f_new = clarray.vdot(tmp_results["DADA"], tmp_results["DAd"]) + clarray.sum( self.lambd * clmath.log(1 + clarray.vdot(tmp_results["gradx"], tmp_results["gradx"])) ) # TODO: calculate on GPU f_new = np.linalg.norm(f_new.get()) grad_f = np.linalg.norm(tmp_results["gradFx"].get()) # TODO: datacosts calculate or get from outside!!!! # datacost = 0 # self._fval_init # TODO: calculate on GPU datacost = 2 * np.linalg.norm(tmp_results["Ax"] - data) ** 2 # datacost = 2 * np.linalg.norm(data - b) ** 2 # self._FT.FFT(b, clarray.to_device( # self._queue[0], (self._step_val[:, None, ...] * # self.par["C"]))).wait() # b = b.get() # datacost = 2 * np.linalg.norm(data - b) ** 2 # TODO: calculate on GPU L2Cost = np.linalg.norm(step_out["x"].get()) / (2.0 * self.delta) regcost = self.lambd * np.sum( np.abs( clmath.log( 1 + clarray.vdot(tmp_results["gradx"], tmp_results["gradx"]) ).get() ) ) costs = datacost + L2Cost + regcost return costs, f_new, grad_f
def _calcnormreverse(self, rhs, lhs, idev, ifun, odd=0): if self.lhs[ifun] is False: rhs += clarray.vdot( self.outp[ ifun][ 2*idev+odd][self.overlap:, ...] - self.inp[ ifun][ 2*idev+odd][0][self.overlap:, ...], self.outp[ ifun][ 2*idev+odd][self.overlap:, ...] - self.inp[ ifun][ 2*idev+odd][0][self.overlap:, ...] ).get() else: lhs += clarray.vdot( self.outp[ ifun][ 2*idev+odd][self.overlap:, ...] - self.inp[ ifun][ 2*idev+odd][-1][self.overlap:, ...], self.outp[ ifun][ 2*idev+odd][self.overlap:, ...] - self.inp[ ifun][ 2*idev+odd][-1][self.overlap:, ...] ).get() return (rhs, lhs)
def test_dot(ctx_factory): from pytest import importorskip importorskip("mako") context = ctx_factory() queue = cl.CommandQueue(context) dtypes = [np.float32, np.complex64] if has_double_support(context.devices[0]): dtypes.extend([np.float64, np.complex128]) for a_dtype in dtypes: for b_dtype in dtypes: print(a_dtype, b_dtype) a_gpu = general_clrand(queue, (200000,), a_dtype) a = a_gpu.get() b_gpu = general_clrand(queue, (200000,), b_dtype) b = b_gpu.get() dot_ab = np.dot(a, b) dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get() assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4 vdot_ab = np.vdot(a, b) vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get() assert abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) < 1e-4
def _calcResidual(self, step_out, tmp_results, step_in, data): temp_fwd_data = self.normkrnldiff(tmp_results["Ax"], data) regcost = self.lambd * np.sum( np.abs( clmath.log( 1 + clarray.vdot(tmp_results["gradx"], tmp_results["gradx"]) ).get() ) ) f = ( temp_fwd_data + 1 / (2 * self.delta) * self.normkrnldiff(step_out["x"], step_in["xk"]) + regcost ) f_new = np.linalg.norm(f.get()) self.normkernl(tmp_results["gradFx"], tmp_results["gradFx"]) grad_f = np.linalg.norm(tmp_results["gradFx"].get()) datacost = 2 * temp_fwd_data ** 2 # L2Cost = np.linalg.norm(self.normkrnldiff(step_out["x"], step_in["xk"]).get()) / (2.0 * self.delta) # L2Cost = np.linalg.norm(step_out["x"].get()) / (2.0 * self.delta) costs = datacost + regcost return costs.get(), f_new, grad_f
def test_dot(ctx_factory): from pytest import importorskip importorskip("mako") context = ctx_factory() queue = cl.CommandQueue(context) dtypes = [np.float32, np.complex64] if has_double_support(context.devices[0]): dtypes.extend([np.float64, np.complex128]) for a_dtype in dtypes: for b_dtype in dtypes: print(a_dtype, b_dtype) a_gpu = general_clrand(queue, (200000, ), a_dtype) a = a_gpu.get() b_gpu = general_clrand(queue, (200000, ), b_dtype) b = b_gpu.get() dot_ab = np.dot(a, b) dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get() assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4 vdot_ab = np.vdot(a, b) vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get() assert abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) < 1e-4
def _calcStepsize(self, x_shape, data_shape, iterations=50, tol=1e-3): """Rescale the step size""" x_temp = np.random.randn(*(x_shape)).astype( self._DTYPE_real ) + 1j * np.random.randn(*(x_shape)).astype(self._DTYPE_real) x = clarray.to_device(self._queue[0], x_temp) x_old = clarray.to_device(self._queue[0], x_temp) data_temp = np.random.randn(*(data_shape)).astype( self._DTYPE_real ) + 1j * np.random.randn(*(data_shape)).astype(self._DTYPE_real) x1 = clarray.to_device(self._queue[0], data_temp) L = 0 print("Start: Stepsize calculation") for i in range(iterations): # TODO: calculate on GPU x_norm = self._DTYPE_real(np.linalg.norm(x.get())) x = x / x_norm # TODO: find a stopping criteria # TODO: calculate on GPU if i > 10 and np.linalg.norm((x - x_old).get()) < tol: print( "Termination: Stepsize found after %i with tol: %f" % (i, np.linalg.norm((x - x_old).get())) ) break x_old = x self._op.fwd( out=x1, inp=[x_old, self._coils, self.modelgrad], wait_for=x.events, ).wait() self._op.adj( x, [x1, self._coils, self.modelgrad], wait_for=x1.events, ).wait() # Norm forward operator, Norm Gradient, # L = np.maximum( # L, np.abs(clarray.vdot(x, x_old).get()) + 8 * self.lambd + 1 / self.delta # ) L = np.sqrt( np.abs(clarray.vdot(x, x_old).get()) + 8 * self.lambd + 1 / self.delta ** 2 ) L = self._DTYPE_real(L) self.alpha = 2 * (1 - self.beta) / L print( "Found Stepsize: \u03B1 %2.1e, \u03B2 %2.1e, L %2.1e\r" % (self.alpha, self.beta, L) )
def cg_solve(self, x, iters): x = clarray.to_device(self.queue, np.require(x, requirements="C")) b = clarray.empty(self.queue, (self.NScan, 1, self.NSlice, self.dimY, self.dimX), DTYPE, "C") Ax = clarray.empty(self.queue, (self.NScan, 1, self.NSlice, self.dimY, self.dimX), DTYPE, "C") data = clarray.to_device(self.queue, self.data) self.operator_rhs(b, data) res = b p = res delta = np.linalg.norm(res.get())**2/np.linalg.norm(b.get())**2 self.res.append(delta) print("Initial Residuum: ", delta) for i in range(iters): self.operator_lhs(Ax, p) Ax = Ax + self.reco_par["lambd"]*p alpha = (clarray.vdot(res, res)/(clarray.vdot(p, Ax))).real.get() x[i+1] = (x[i] + alpha*p) res_new = res - alpha*Ax delta = np.linalg.norm(res_new.get())**2/np.linalg.norm(b.get())**2 self.res.append(delta) if delta < self.reco_par["tol"]: print("Converged after %i iterations to %1.3e." % (i, delta)) return x.get()[:i+1, ...] if not np.mod(i, 1): print("Residuum at iter %i : %1.3e" % (i, delta), end='\r') beta = (clarray.vdot(res_new, res_new) / clarray.vdot(res, res)).real.get() p = res_new+beta*p (res, res_new) = (res_new, res) return x.get()
def test_dot(ctx_factory): from pytest import importorskip importorskip("mako") context = ctx_factory() queue = cl.CommandQueue(context) dev = context.devices[0] dtypes = [np.float32, np.complex64] if has_double_support(dev): if has_struct_arg_count_bug(dev) == "apple": dtypes.extend([np.float64]) else: dtypes.extend([np.float64, np.complex128]) for a_dtype in dtypes: for b_dtype in dtypes: print(a_dtype, b_dtype) a_gpu = general_clrand(queue, (200000,), a_dtype) a = a_gpu.get() b_gpu = general_clrand(queue, (200000,), b_dtype) b = b_gpu.get() dot_ab = np.dot(a, b) dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get() assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4 try: vdot_ab = np.vdot(a, b) except NotImplementedError: import sys is_pypy = "__pypy__" in sys.builtin_module_names if is_pypy: print("PYPY: VDOT UNIMPLEMENTED") continue else: raise vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get() rel_err = abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) assert rel_err < 1e-4, rel_err
def test_dot(ctx_factory): from pytest import importorskip importorskip("mako") context = ctx_factory() queue = cl.CommandQueue(context) dev = context.devices[0] dtypes = [np.float32, np.complex64] if has_double_support(dev): if has_struct_arg_count_bug(dev) == "apple": dtypes.extend([np.float64]) else: dtypes.extend([np.float64, np.complex128]) for a_dtype in dtypes: for b_dtype in dtypes: print(a_dtype, b_dtype) a_gpu = general_clrand(queue, (200000,), a_dtype) a = a_gpu.get() b_gpu = general_clrand(queue, (200000,), b_dtype) b = b_gpu.get() dot_ab = np.dot(a, b) dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get() assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4 try: vdot_ab = np.vdot(a, b) except NotImplementedError: import sys is_pypy = '__pypy__' in sys.builtin_module_names if is_pypy: print("PYPY: VDOT UNIMPLEMENTED") continue else: raise vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get() rel_err = abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) assert rel_err < 1e-4, rel_err
def vdot(self, x, y): from pyopencl.array import vdot return vdot(x, y, queue=self.queue).get()