Beispiel #1
0
 def _calcnormforward(self, rhs, lhs, idev, ifun, odd=0):
     if self.lhs[ifun] is False:
         rhs += clarray.vdot(
             self.outp[
                 ifun][
                     2*idev+odd][:self.slices, ...] -
             self.inp[
                 ifun][
                     2*idev+odd][0][:self.slices, ...],
             self.outp[
                 ifun][
                     2*idev+odd][:self.slices, ...] -
             self.inp[
                 ifun][
                     2*idev+odd][0][:self.slices, ...]
             ).get()
     else:
         lhs += clarray.vdot(
             self.outp[
                 ifun][
                     2*idev+odd][:self.slices, ...] -
             self.inp[
                 ifun][
                     2*idev+odd][-1][:self.slices, ...],
             self.outp[
                 ifun][
                     2*idev+odd][:self.slices, ...] -
             self.inp[
                 ifun][
                     2*idev+odd][-1][:self.slices, ...]
             ).get()
     return (rhs, lhs)
Beispiel #2
0
    def _calcResidual(self, step_out, tmp_results, step_in, data):

        f_new = clarray.vdot(tmp_results["DADA"], tmp_results["DAd"]) + clarray.sum(
            self.lambd
            * clmath.log(1 + clarray.vdot(tmp_results["gradx"], tmp_results["gradx"]))
        )

        # TODO: calculate on GPU
        f_new = np.linalg.norm(f_new.get())

        grad_f = np.linalg.norm(tmp_results["gradFx"].get())

        # TODO: datacosts calculate or get from outside!!!!
        # datacost = 0  # self._fval_init
        # TODO: calculate on GPU
        datacost = 2 * np.linalg.norm(tmp_results["Ax"] - data) ** 2
        # datacost = 2 * np.linalg.norm(data - b) ** 2
        # self._FT.FFT(b, clarray.to_device(
        #       self._queue[0], (self._step_val[:, None, ...] *
        #          self.par["C"]))).wait()
        # b = b.get()
        # datacost = 2 * np.linalg.norm(data - b) ** 2
        # TODO: calculate on GPU
        L2Cost = np.linalg.norm(step_out["x"].get()) / (2.0 * self.delta)
        regcost = self.lambd * np.sum(
            np.abs(
                clmath.log(
                    1 + clarray.vdot(tmp_results["gradx"], tmp_results["gradx"])
                ).get()
            )
        )
        costs = datacost + L2Cost + regcost
        return costs, f_new, grad_f
Beispiel #3
0
 def _calcnormreverse(self, rhs, lhs, idev, ifun, odd=0):
     if self.lhs[ifun] is False:
         rhs += clarray.vdot(
             self.outp[
                 ifun][
                     2*idev+odd][self.overlap:, ...] -
             self.inp[
                 ifun][
                     2*idev+odd][0][self.overlap:, ...],
             self.outp[
                 ifun][
                     2*idev+odd][self.overlap:, ...] -
             self.inp[
                 ifun][
                     2*idev+odd][0][self.overlap:, ...]
             ).get()
     else:
         lhs += clarray.vdot(
             self.outp[
                 ifun][
                     2*idev+odd][self.overlap:, ...] -
             self.inp[
                 ifun][
                     2*idev+odd][-1][self.overlap:, ...],
             self.outp[
                 ifun][
                     2*idev+odd][self.overlap:, ...] -
             self.inp[
                 ifun][
                     2*idev+odd][-1][self.overlap:, ...]
             ).get()
     return (rhs, lhs)
Beispiel #4
0
def test_dot(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtypes = [np.float32, np.complex64]
    if has_double_support(context.devices[0]):
        dtypes.extend([np.float64, np.complex128])

    for a_dtype in dtypes:
        for b_dtype in dtypes:
            print(a_dtype, b_dtype)
            a_gpu = general_clrand(queue, (200000,), a_dtype)
            a = a_gpu.get()
            b_gpu = general_clrand(queue, (200000,), b_dtype)
            b = b_gpu.get()

            dot_ab = np.dot(a, b)
            dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

            assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4

            vdot_ab = np.vdot(a, b)
            vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()

            assert abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) < 1e-4
Beispiel #5
0
    def _calcResidual(self, step_out, tmp_results, step_in, data):

        temp_fwd_data = self.normkrnldiff(tmp_results["Ax"], data)

        regcost = self.lambd * np.sum(
            np.abs(
                clmath.log(
                    1 + clarray.vdot(tmp_results["gradx"], tmp_results["gradx"])
                ).get()
            )
        )

        f = (
            temp_fwd_data
            + 1 / (2 * self.delta) * self.normkrnldiff(step_out["x"], step_in["xk"])
            + regcost
        )

        f_new = np.linalg.norm(f.get())

        self.normkernl(tmp_results["gradFx"], tmp_results["gradFx"])
        grad_f = np.linalg.norm(tmp_results["gradFx"].get())

        datacost = 2 * temp_fwd_data ** 2
        # L2Cost =  np.linalg.norm(self.normkrnldiff(step_out["x"], step_in["xk"]).get()) / (2.0 * self.delta)
        # L2Cost = np.linalg.norm(step_out["x"].get()) / (2.0 * self.delta)

        costs = datacost + regcost
        return costs.get(), f_new, grad_f
Beispiel #6
0
def test_dot(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dtypes = [np.float32, np.complex64]
    if has_double_support(context.devices[0]):
        dtypes.extend([np.float64, np.complex128])

    for a_dtype in dtypes:
        for b_dtype in dtypes:
            print(a_dtype, b_dtype)
            a_gpu = general_clrand(queue, (200000, ), a_dtype)
            a = a_gpu.get()
            b_gpu = general_clrand(queue, (200000, ), b_dtype)
            b = b_gpu.get()

            dot_ab = np.dot(a, b)
            dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

            assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4

            vdot_ab = np.vdot(a, b)
            vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()

            assert abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab) < 1e-4
Beispiel #7
0
    def _calcStepsize(self, x_shape, data_shape, iterations=50, tol=1e-3):
        """Rescale the step size"""

        x_temp = np.random.randn(*(x_shape)).astype(
            self._DTYPE_real
        ) + 1j * np.random.randn(*(x_shape)).astype(self._DTYPE_real)
        x = clarray.to_device(self._queue[0], x_temp)
        x_old = clarray.to_device(self._queue[0], x_temp)
        data_temp = np.random.randn(*(data_shape)).astype(
            self._DTYPE_real
        ) + 1j * np.random.randn(*(data_shape)).astype(self._DTYPE_real)
        x1 = clarray.to_device(self._queue[0], data_temp)
        L = 0
        print("Start: Stepsize calculation")

        for i in range(iterations):

            # TODO: calculate on GPU
            x_norm = self._DTYPE_real(np.linalg.norm(x.get()))
            x = x / x_norm

            # TODO: find a stopping criteria
            # TODO: calculate on GPU
            if i > 10 and np.linalg.norm((x - x_old).get()) < tol:
                print(
                    "Termination: Stepsize found after %i with tol: %f"
                    % (i, np.linalg.norm((x - x_old).get()))
                )
                break

            x_old = x
            self._op.fwd(
                out=x1,
                inp=[x_old, self._coils, self.modelgrad],
                wait_for=x.events,
            ).wait()
            self._op.adj(
                x,
                [x1, self._coils, self.modelgrad],
                wait_for=x1.events,
            ).wait()

        # Norm forward operator, Norm Gradient,
        # L = np.maximum(
        #     L, np.abs(clarray.vdot(x, x_old).get()) + 8 * self.lambd + 1 / self.delta
        # )
        L = np.sqrt(
            np.abs(clarray.vdot(x, x_old).get()) + 8 * self.lambd + 1 / self.delta ** 2
        )

        L = self._DTYPE_real(L)
        self.alpha = 2 * (1 - self.beta) / L
        print(
            "Found Stepsize: \u03B1  %2.1e, \u03B2  %2.1e, L %2.1e\r"
            % (self.alpha, self.beta, L)
        )
Beispiel #8
0
    def cg_solve(self, x, iters):
        x = clarray.to_device(self.queue, np.require(x, requirements="C"))
        b = clarray.empty(self.queue,
                          (self.NScan, 1, self.NSlice, self.dimY, self.dimX),
                          DTYPE, "C")
        Ax = clarray.empty(self.queue,
                           (self.NScan, 1, self.NSlice, self.dimY, self.dimX),
                           DTYPE, "C")
        data = clarray.to_device(self.queue, self.data)

        self.operator_rhs(b, data)
        res = b
        p = res
        delta = np.linalg.norm(res.get())**2/np.linalg.norm(b.get())**2
        self.res.append(delta)
        print("Initial Residuum: ", delta)

        for i in range(iters):
            self.operator_lhs(Ax, p)
            Ax = Ax + self.reco_par["lambd"]*p
            alpha = (clarray.vdot(res, res)/(clarray.vdot(p, Ax))).real.get()
            x[i+1] = (x[i] + alpha*p)
            res_new = res - alpha*Ax
            delta = np.linalg.norm(res_new.get())**2/np.linalg.norm(b.get())**2
            self.res.append(delta)
            if delta < self.reco_par["tol"]:
                print("Converged after %i iterations to %1.3e." % (i, delta))
                return x.get()[:i+1, ...]
            if not np.mod(i, 1):
                print("Residuum at iter %i : %1.3e" % (i, delta), end='\r')

            beta = (clarray.vdot(res_new, res_new) /
                    clarray.vdot(res, res)).real.get()
            p = res_new+beta*p
            (res, res_new) = (res_new, res)
        return x.get()
Beispiel #9
0
def test_dot(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dev = context.devices[0]

    dtypes = [np.float32, np.complex64]
    if has_double_support(dev):
        if has_struct_arg_count_bug(dev) == "apple":
            dtypes.extend([np.float64])
        else:
            dtypes.extend([np.float64, np.complex128])

    for a_dtype in dtypes:
        for b_dtype in dtypes:
            print(a_dtype, b_dtype)
            a_gpu = general_clrand(queue, (200000,), a_dtype)
            a = a_gpu.get()
            b_gpu = general_clrand(queue, (200000,), b_dtype)
            b = b_gpu.get()

            dot_ab = np.dot(a, b)
            dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

            assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4

            try:
                vdot_ab = np.vdot(a, b)
            except NotImplementedError:
                import sys
                is_pypy = "__pypy__" in sys.builtin_module_names
                if is_pypy:
                    print("PYPY: VDOT UNIMPLEMENTED")
                    continue
                else:
                    raise

            vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()

            rel_err = abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab)
            assert rel_err < 1e-4, rel_err
Beispiel #10
0
def test_dot(ctx_factory):
    from pytest import importorskip
    importorskip("mako")

    context = ctx_factory()
    queue = cl.CommandQueue(context)

    dev = context.devices[0]

    dtypes = [np.float32, np.complex64]
    if has_double_support(dev):
        if has_struct_arg_count_bug(dev) == "apple":
            dtypes.extend([np.float64])
        else:
            dtypes.extend([np.float64, np.complex128])

    for a_dtype in dtypes:
        for b_dtype in dtypes:
            print(a_dtype, b_dtype)
            a_gpu = general_clrand(queue, (200000,), a_dtype)
            a = a_gpu.get()
            b_gpu = general_clrand(queue, (200000,), b_dtype)
            b = b_gpu.get()

            dot_ab = np.dot(a, b)
            dot_ab_gpu = cl_array.dot(a_gpu, b_gpu).get()

            assert abs(dot_ab_gpu - dot_ab) / abs(dot_ab) < 1e-4

            try:
                vdot_ab = np.vdot(a, b)
            except NotImplementedError:
                import sys
                is_pypy = '__pypy__' in sys.builtin_module_names
                if is_pypy:
                    print("PYPY: VDOT UNIMPLEMENTED")
                    continue
                else:
                    raise

            vdot_ab_gpu = cl_array.vdot(a_gpu, b_gpu).get()

            rel_err = abs(vdot_ab_gpu - vdot_ab) / abs(vdot_ab)
            assert rel_err < 1e-4, rel_err
Beispiel #11
0
 def vdot(self, x, y):
     from pyopencl.array import vdot
     return vdot(x, y, queue=self.queue).get()
Beispiel #12
0
 def vdot(self, x, y):
     from pyopencl.array import vdot
     return vdot(x, y, queue=self.queue).get()