Exemple #1
0
    def __init__(self, par, mesh, _vars, params, observables):
        self.par = par
        self.mesh = mesh
        self.vars = _vars
        self.params = params
        self.fixed_vortices = self.params.fixed_vortices
        self.observables = observables

        self.solveA = self.params.solveA

        self.__order_parameter_phase_lock_krnl = self.par.get_function(
            'order_parameter_phase_lock')
        self.__iterate_order_parameter_jacobi_step_krnl = self.par.get_function(
            'iterate_order_parameter_jacobi_step')
        self.__iterate_vector_potential_jacobi_step_krnl = self.par.get_function(
            'iterate_vector_potential_jacobi_step')

        self.__xpy_r_krnl = self.par.get_function('xpy_r')
        self.__xmy_r_krnl = self.par.get_function('xmy_r')

        self._random_t = np.uint32(1)
        if cfg.random_seed is not None:
            self._random_t = np.uint32(cfg.random_seed)

        # Alloc the rhs arrays
        self.vars._tmp_node_var = GArray(like=self.vars._psi)

        shapes = [(cfg.Nxa, cfg.Nya), (cfg.Nxb, cfg.Nyb)]
        self.vars._tmp_edge_var = GArray(shape=shapes, dtype=cfg.dtype)

        A_size = self.vars.vector_potential_h().size

        self.__gab_next = gpuarray.zeros(A_size, dtype=cfg.dtype)
        self.__gpsi_next = gpuarray.empty_like(self.vars.order_parameter_h())
        self.__gr2_max = gpuarray.zeros(1, dtype=np.int32)

        # Adjust stopping criteria according to precision
        if cfg.dtype is np.float32:
            if cfg.stop_criterion_order_parameter < 1e-6:
                cfg.stop_criterion_order_parameter = 1e-6

            if cfg.stop_criterion_vector_potential < 1e-6:
                cfg.stop_criterion_vector_potential = 1e-6
        else:
            if cfg.stop_criterion_order_parameter < 1e-12:
                cfg.stop_criterion_order_parameter = 1e-12

            if cfg.stop_criterion_vector_potential < 1e-12:
                cfg.stop_criterion_vector_potential = 1e-12

        cfg.stop_criterion_order_parameter = cfg.dtype(
            cfg.stop_criterion_order_parameter)
        cfg.stop_criterion_vector_potential = cfg.dtype(
            cfg.stop_criterion_vector_potential)
Exemple #2
0
    def __init__(self, mesh, vars):
        self.mesh = mesh
        self.vars = vars
        self.fixed_vortices = FixedVortices(self.mesh, self.vars)

        self.solveA = False

        self.linear_coefficient = cfg.linear_coefficient  # epsilon
        self.gl_parameter = cfg.gl_parameter  # kappa
        self.normal_conductivity = cfg.normal_conductivity  # sigma

        # homogeneous external magnetic field
        self._H = cfg.dtype(0.0)
        self.homogeneous_external_field_reset = cfg.homogeneous_external_field

        # x- and y- components of external vector potential for non-homogeneous external magnetic field
        self.ae, self.be = None, None

        # external and irregular vector potential
        # it should be kept self._vpei = (self.ae, self.be) + (ai, bi)
        self._vpei = None

        # non-homogeneous external magnetic field
        self.external_field = cfg.external_field

        self.order_parameter_Langevin_coefficient = cfg.order_parameter_Langevin_coefficient

        self.vector_potential_Langevin_coefficient = cfg.vector_potential_Langevin_coefficient
Exemple #3
0
    def test_sum_v(self, a_in, nv, ne, block_size=256):

        assert ne is 5
        assert block_size >= 0 and block_size <= 1024

        ga_in = gpuarray.to_gpu(cfg.dtype(a_in))
        a_out = self.gsum_v(ga_in, nv, ne, block_size=block_size)

        return a_out
Exemple #4
0
    def gl_parameter(self, gl_parameter):
        if gl_parameter is None or np.isnan(gl_parameter) or np.isinf(
                gl_parameter):
            gl_parameter = np.inf
        assert isinstance(gl_parameter,
                          (np.floating, float, np.integer, int)) and (
                              np.isposinf(gl_parameter) or gl_parameter > 0.0)
        self._kappa = cfg.dtype(gl_parameter)

        self.solveA = np.bool(not np.isposinf(self._kappa))
Exemple #5
0
    def test_sum(self, a_in, N, block_size=256):

        assert block_size >= 0 and block_size <= 1024

        ga_in = gpuarray.to_gpu(cfg.dtype(a_in))
        a_out = self.gsum(ga_in, block_size=block_size)

        ga_in.gpudata.free()

        return a_out
Exemple #6
0
    def _set_iterator_options(self,
                              iterator_type,
                              Nt=None,
                              dt=None,
                              T=None,
                              mandatory_definition=True):
        assert iterator_type in ['order_parameter', 'vector_potential']

        # if iterator_type == 'order_parameter':
        #     if Nt is None:  Nt = self.Nt
        #     if dt is None:  dt = self.dt
        #     if T  is None:  T  = self.T
        # elif iterator_type == 'vector_potential':
        #     if Nt is None:  Nt = self.NtA
        #     if dt is None:  dt = self.dtA
        #     if T  is None:  T  = self.TA

        if Nt is not None and T is not None and dt is None:
            dt = float(T) / Nt
        elif Nt is not None and T is None and dt is not None:
            T = float(dt) * Nt
        elif Nt is None and T is not None and dt is not None:
            Nt = int(np.round(T / dt))
        elif Nt is not None and T is not None and dt is not None:
            assert np.isclose(T, dt * Nt)

        if mandatory_definition:
            assert isinstance(
                dt, (np.floating, float, np.integer, int)) and dt >= 0.0
            assert isinstance(Nt, (np.integer, int)) and Nt >= 0

        if iterator_type == 'order_parameter':
            self.Nt = np.int32(Nt) if Nt is not None else None
            self.dt = cfg.dtype(dt) if dt is not None else None
            self.T = cfg.dtype(T) if T is not None else None
        elif iterator_type == 'vector_potential':
            self.NtA = np.int32(Nt) if Nt is not None else None
            self.dtA = cfg.dtype(dt) if dt is not None else None
            self.TA = cfg.dtype(T) if T is not None else None
Exemple #7
0
    def _update_vector_potential(self, homogeneous_external_field, reset):
        assert isinstance(homogeneous_external_field,
                          (np.floating, float, np.integer, int))
        if reset:
            self._H = cfg.dtype(homogeneous_external_field)

            # TODO: need a fill method in GArray
            # self.a.fill(0.0)
            # self.b.fill(0.0)

            a, b = self.vars._vp.get_vec_h()

            a.fill(0.0)
            b.fill(0.0)

            self.vars._vp.need_htod_sync()
            self.vars._vp.sync()

            delta_H = self._H
        else:
            delta_H = -self._H
            self._H = cfg.dtype(homogeneous_external_field)
            delta_H += self._H
            self.vars._vp.sync()

        # TODO: implement GPU version of ab initialization
        # Possible set of gauges, A = [g*y*H, (1-g)*x*H, 0] with any g, 0 <= g <= 1
        g = 0.5
        _, yg = self.mesh.xy_a_grid
        xg, _ = self.mesh.xy_b_grid

        a, b = self.vars._vp.get_vec_h()
        a -= g * (yg - 0.5 * cfg.Ly) * delta_H
        b += (1.0 - g) * (xg - 0.5 * cfg.Lx) * delta_H

        self.vars._vp.need_htod_sync()
        self.vars._vp.sync()
Exemple #8
0
    def __init__(
            self,
            Nx=None,
            dx=None,
            Lx=None,  # geometry
            Ny=None,
            dy=None,
            Ly=None,
            Nt=None,
            dt=None,
            T=None,  # parameters for order parameter iterator
            NtA=None,
            dtA=None,
            TA=None,  # parameters for order vector potential
            material_tiling=None,
            order_parameter='random',
            random_seed=None,
            random_level=1.0,
            gl_parameter=np.
        inf,  # = lambda/xi, GL parameter; if None, np.nan, or np.inf then solveA = False
            normal_conductivity=1.0,  # normal-state conductivity 
            linear_coefficient=1.0,  # linear coefficient in GL equation
            homogeneous_external_field=0.0,
            external_field=0.0,
            fixed_vortices=None,
            fixed_vortices_correction='cell centers',
            phase_lock_radius=None,
            device_id=0,
            dtype=np.float64,
            stop_criterion_order_parameter=1e-6,
            stop_criterion_vector_potential=1e-6,
            order_parameter_Langevin_coefficient=0.0,
            vector_potential_Langevin_coefficient=0.0,
            convergence_rtol=1e-6,  # relative tolerance for convergence
    ):

        self.dtypes = (np.float32, np.float64)

        cfg.device_id = device_id

        assert dtype in self.dtypes
        cfg.dtype = dtype
        cfg.dtype_complex = {
            np.float32: np.complex64,
            np.float64: np.complex128
        }[cfg.dtype]

        if Nx is not None and Lx is not None and dx is None:
            dx = float(Lx) / (Nx - 1)
        elif Nx is not None and Lx is None and dx is not None:
            Lx = float(dx) * (Nx - 1)
        elif Nx is None and Lx is not None and dx is not None:
            Nx = int(np.round(Lx / dx) + 1)
        elif Nx is not None and Lx is not None and dx is not None:
            assert np.isclose(Lx, dx * (Nx - 1))
        else:
            raise 'Two out of three Nx, Lx, dx must be defined'

        if Ny is not None and Ly is not None and dy is None:
            dy = float(Ly) / (Ny - 1)
        elif Ny is not None and Ly is None and dy is not None:
            Ly = float(dy) * (Ny - 1)
        elif Ny is None and Ly is not None and dy is not None:
            Ny = int(np.round(Ly / dy) + 1)
        elif Ny is not None and Ly is not None and dy is not None:
            assert np.isclose(Ly, dy * (Ny - 1))
        else:
            raise 'Two out of three Ny, Ly, and dy must be defined'

        assert isinstance(Lx,
                          (np.floating, float, np.integer, int)) and Lx > 0.0
        assert isinstance(Ly,
                          (np.floating, float, np.integer, int)) and Ly > 0.0
        assert isinstance(Nx, (np.integer, int)) and Nx >= 4
        assert isinstance(Ny, (np.integer, int)) and Ny >= 4

        cfg.Nx, cfg.Ny = np.int32(Nx), np.int32(Ny)
        cfg.Lx, cfg.Ly = cfg.dtype(Lx), cfg.dtype(Ly)
        cfg.dx, cfg.dy = cfg.dtype(dx), cfg.dtype(dy)

        cfg.N = cfg.Nx * cfg.Ny

        # number of centers of horizontal edges excluding boundaries
        cfg.Nxa, cfg.Nya = cfg.Nx - 1, cfg.Ny

        # number of centers of vertical edges excluding boundaries
        cfg.Nxb, cfg.Nyb = cfg.Nx, cfg.Ny - 1

        cfg.Na, cfg.Nb = cfg.Nxa * cfg.Nya, cfg.Nxb * cfg.Nyb
        cfg.Nab = cfg.Na + cfg.Nb

        # number of cells
        cfg.Nxc, cfg.Nyc = cfg.Nx - 1, cfg.Ny - 1
        cfg.Nc = cfg.Nxc * cfg.Nyc

        cfg.idx, cfg.idy = 1.0 / cfg.dx, 1.0 / cfg.dy
        cfg.idx2, cfg.idy2 = cfg.idx * cfg.idx, cfg.idy * cfg.idy

        cfg.idx2, cfg.idy2, cfg.idxy = cfg.idx * cfg.idx, cfg.idy * cfg.idy, cfg.idx * cfg.idy
        cfg.j_dx, cfg.j_dy = 1.0j * cfg.dx, 1.0j * cfg.dy

        cfg.material_tiling = material_tiling
        cfg.order_parameter = order_parameter
        cfg.random_seed = random_seed
        cfg.random_level = random_level

        cfg.gl_parameter = gl_parameter
        cfg.linear_coefficient = linear_coefficient
        cfg.normal_conductivity = normal_conductivity

        cfg.homogeneous_external_field = homogeneous_external_field
        cfg.external_field = external_field

        cfg.fixed_vortices = fixed_vortices
        cfg.fixed_vortices_correction = fixed_vortices_correction
        cfg.phase_lock_radius = phase_lock_radius

        cfg.order_parameter_Langevin_coefficient = order_parameter_Langevin_coefficient
        cfg.vector_potential_Langevin_coefficient = vector_potential_Langevin_coefficient

        cfg.Nt, cfg.dt, cfg.T = None, None, None
        cfg.NtA, cfg.dtA, cfg.TA = None, None, None

        assert isinstance(stop_criterion_order_parameter,
                          (np.floating, float, np.integer,
                           int)) and stop_criterion_order_parameter > 0.0
        cfg.stop_criterion_order_parameter = cfg.dtype(
            stop_criterion_order_parameter)

        assert isinstance(stop_criterion_vector_potential,
                          (np.floating, float, np.integer,
                           int)) and stop_criterion_vector_potential > 0.0
        cfg.stop_criterion_vector_potential = cfg.dtype(
            stop_criterion_vector_potential)

        # relative tolerance for convergence
        cfg.convergence_rtol = convergence_rtol

        self.cfg = cfg

        self.par = GLPar.Startup()

        self.par.red = GLPar.Reduction(self.par)

        self.mesh = GLMesh.Grid()

        self.vars = GLVars.Vars(self.par, self.mesh)

        self.params = GLVars.Params(self.mesh, self.vars)

        self.observables = GLObs.Observables(self.par, self.mesh, self.vars,
                                             self.params)

        self.solve = GLSolvers.Solvers(self.par, self.mesh, self.vars,
                                       self.params, self.observables)

        self.vortex_detector = GLObs.VortexDetector(self.vars, self.params,
                                                    self.solve)
Exemple #9
0
    def __iterate_vector_potential_gpu(self):
        """Performs dtA-iteration of self.a/self.b on GPU"""

        # self.gabi += self.gab; no memory allocation
        if self.fixed_vortices._vpi is not None:
            self.__xpy_r_krnl(
                self.fixed_vortices.irregular_vector_potential_h(),
                self.vars.vector_potential_h(),
                np.uint32(cfg.N),
                block=(self.par.block_size, 1, 1),
                grid=(self.par.grid_size, 1, 1))
            gabi_gab = self.fixed_vortices.irregular_vector_potential_h(
            )  # just a pointer
        else:
            gabi_gab = self.vars.vector_potential_h()

        # similar to gab_rhs = gab.copy(), but does not allocate new array
        Utils.copy_dtod(self.vars._tmp_edge_var_h(),
                        self.vars.vector_potential_h())
        #self.vars._tmp_edge_var.need_dtoh_sync()

        # if self.ab_langevin_c > 1e-16:
        #     self.gab_rhs += self.ab_langevin_c*(curand(self.gab_rhs.shape, dtype=cfg.dtype) - 0.5)
        for j in range(1024):
            self.__gr2_max.fill(np.int32(0))

            self.__iterate_vector_potential_jacobi_step_krnl(
                self.dt,
                self.params.gl_parameter_squared_h(),
                self.params._rho,
                self.params.homogeneous_external_field,
                self.mesh.material_tiling_h(),
                self.vars.order_parameter_h(),
                gabi_gab,
                self.vars._tmp_edge_var_h(
                ),  # ab for right-hand side; does not change during Jacobi interactions
                self.vars.vector_potential_h(),  # ab^{j} in Jacobi method
                self.__gab_next,  # ab^{j+1} in Jacobi method
                self.params.vector_potential_Langevin_coefficient,
                np.uint32(j),
                self._random_t,
                cfg.stop_criterion_vector_potential,
                self.__gr2_max,
                grid=(self.par.grid_size, 1, 1),
                block=(self.par.block_size, 1, 1),
            )

            # swap pointers, does not change arrays
            self.vars._vp._gdata, self.__gab_next = self.__gab_next, self.vars._vp._gdata
            #self.vars.vector_potential_h(), self.__gab_next = self.__gab_next, self.vars.vector_potential_h()

            # r2_max_norm = residual/stop_criterion
            r2_max_norm = 1.0e-4 * cfg.dtype(self.__gr2_max.get()[0])

            # convergence criteria
            if r2_max_norm < 1.0:
                break

        self._random_t += np.uint32(1)

        self.vars._vp.need_dtoh_sync()

        # self.gabi -= self.gab; no memory allocation
        if self.fixed_vortices._vpi is not None:
            self.__xmy_r_krnl(
                self.fixed_vortices.irregular_vector_potential_h(),
                self.vars.vector_potential_h(),
                np.uint32(cfg.N),
                block=(self.par.block_size, 1, 1),
                grid=(self.par.grid_size, 1, 1))
Exemple #10
0
    def __iterate_order_parameter_gpu(self, gab_gabi):
        """Performs dt-iteration of self.psi on GPU"""

        # similar to gpsi_rhs = gpsi.copy(), but does not allocate new array
        Utils.copy_dtod(self.vars._tmp_node_var_h(),
                        self.vars.order_parameter_h())
        #self.vars._tmp_node_var.need_dtoh_sync()

        for j in range(1024):
            self.__gr2_max.fill(np.int32(0))

            # TODO: prepare all cuda calls
            self.__iterate_order_parameter_jacobi_step_krnl(
                self.dt,
                self.params.linear_coefficient_scalar_h(),
                self.params.linear_coefficient_h(),
                self.mesh.material_tiling_h(),
                gab_gabi,
                self.vars._tmp_node_var_h(
                ),  # psi for right-hand side; does not change during Jacobi interactions
                self.vars.order_parameter_h(),  # psi^{j} in Jacobi method
                self.__gpsi_next,  # psi^{j+1} in Jacobi method
                self.params.order_parameter_Langevin_coefficient,
                np.uint32(j),
                self._random_t,
                cfg.stop_criterion_order_parameter,
                self.__gr2_max,
                grid=(self.par.grid_size, 1, 1),
                block=(self.par.block_size, 1, 1),
            )

            # swap pointers, does not change arrays
            # TODO: this is hard-wired for now since python doesn't allow
            # assignment for a function call.Sync Status not updated

            self.vars._psi._gdata, self.__gpsi_next = self.__gpsi_next, self.vars._psi._gdata
            #self.vars.order_parameter_h(), self.__gpsi_next = self.__gpsi_next, self.vars.order_parameter_h()

            # residual = max{|b-M*psi|}
            # r2_max_norm = residual/stop_criterion
            r2_max_norm = 1.0e-4 * cfg.dtype(self.__gr2_max.get()[0])

            # convergence criteria
            if r2_max_norm < 1.0:
                break

        self._random_t += np.uint32(1)

        if self.fixed_vortices._phase_lock_ns is not None:
            block_size = 2
            grid_size = Utils.intceil(self.fixed_vortices._phase_lock_ns.size,
                                      block_size)

            self.__order_parameter_phase_lock_krnl(
                self.vars.order_parameter_h(),
                np.int32(self.fixed_vortices._phase_lock_ns.size),
                self.fixed_vortices._phase_lock_ns_h(),
                grid=(grid_size, 1, 1),
                block=(block_size, 1, 1),
            )

        self.vars._psi.need_dtoh_sync()
Exemple #11
0
    def __free_energy_minimization(self, n_iter=1000):
        """Minimizes energy with respect to order parameter and vector potential"""
        # TODO: check material tiling
        # TODO: add external vector potential
        # TODO: add phase lock gridpoints

        # TODO: Ideally there should be one entry for both minimzation

        self.vars._psi.sync()
        self.vars._vp.sync()

        self.cg_energies = []  # TMP

        #beta_psi = 0.0  # First iteration is steepest descent, so make beta = 0.0
        #beta_A = 0.0  # First iteration is steepest descent, so make beta = 0.0

        # gpu arrays:
        # (g)dir     : search direction
        # (g)jac     : gradient
        # (g)jac_prev: gradient from previous iteration

        #cuda.start_profiler()

        self.vars._alloc_free_temporary_gpu_storage('alloc')

        for i in range(n_iter):

            # 1. Compute jacobians
            self.__gjac_psi = self._free_energy_jacobian_psi
            self.__gjac_A = self._free_energy_jacobian_A

            # 2. Compute betas
            # use Polak–Ribière formula with resetting
            # TODO: consider other formulas, see e.g. https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method
            if i > 0:
                self.__compute_beta_psi(self.__gjac_psi, self.__gjac_psi_prev)
                self.__compute_beta_A(self.__gjac_A, self.__gjac_A_prev)

            # 3. Update search directions
            self.__axmy_c_krnl(self.__gdir_psi,
                               self.__gjac_psi,
                               self.__gdir_psi,
                               self._beta_psi,
                               np.uint32(cfg.N),
                               block=(self.par.block_size, 1, 1),
                               grid=(self.par.grid_size, 1, 1))

            self.__axmy_r_krnl(self.__gdir_A,
                               self.__gjac_A,
                               self.__gdir_A,
                               self._beta_A,
                               np.uint32(cfg.Nab),
                               block=(self.par.block_size, 1, 1),
                               grid=(self.par.grid_size_A, 1, 1))

            # 4. Compute alphas
            self._free_energy_conjgrad_coef(self.__gdir_psi, self.__gdir_A)
            alpha_psi, alpha_A = self._cg_alpha_min()
            #print('iter: ', i, 'c: ', self.__c, 'alpha, beta: ', alpha_psi, alpha_A, beta_psi, beta_A, flush=True)

            # 5. Update variables
            self.__axpy_c_krnl(self.__gdir_psi,
                               self.vars.order_parameter_h(),
                               self.vars.order_parameter_h(),
                               cfg.dtype(alpha_psi),
                               np.uint32(cfg.N),
                               block=(self.par.block_size, 1, 1),
                               grid=(self.par.grid_size, 1, 1))

            self.__axpy_r_krnl(self.__gdir_A,
                               self.vars.vector_potential_h(),
                               self.vars.vector_potential_h(),
                               cfg.dtype(alpha_A),
                               np.uint32(cfg.Nab),
                               block=(self.par.block_size, 1, 1),
                               grid=(self.par.grid_size_A, 1, 1))

            # 6. Save previous step
            Utils.copy_dtod(self.__gjac_psi_prev, self.__gjac_psi)
            Utils.copy_dtod(self.__gjac_A_prev, self.__gjac_A)

            E0 = self.observables.free_energy  # TMP
            self.cg_energies.append(E0)  # TMP
            # if i%10 == 0:
            #     print('%3.d: E = %10.10f' % (i, E0)) # TMP

            if (i > 0
                    and np.abs(self.cg_energies[i] / self.cg_energies[i - 1] -
                               1.0) < self.__convergence_rtol):
                #print('CG converged in %d iterations with residual %g ' % ( i, np.abs(self.cg_energies[i]/self.cg_energies[i-1] - 1.0)))
                break

        #cuda.stop_profiler()

        self.vars._psi.need_dtoh_sync()
        self.vars._vp.need_dtoh_sync()

        self.vars._alloc_free_temporary_gpu_storage('free')
Exemple #12
0
    def gl_parameter_squared_h(self):
        if self.solveA:
            return cfg.dtype(self.gl_parameter**2)

        return cfg.dtype(-1.0)
Exemple #13
0
    def linear_coefficient_scalar_h(self):
        if self._epsilon.size == 1:
            return self._epsilon.get_h()

        return cfg.dtype(0.0)
Exemple #14
0
 def vector_potential_Langevin_coefficient(
         self, vector_potential_Langevin_coefficient):
     assert isinstance(vector_potential_Langevin_coefficient,
                       (np.floating, float, np.integer, int))
     self._ab_langevin_c = cfg.dtype(vector_potential_Langevin_coefficient)
Exemple #15
0
 def order_parameter_Langevin_coefficient(
         self, order_parameter_Langevin_coefficient):
     assert isinstance(order_parameter_Langevin_coefficient,
                       (np.floating, float, np.integer, int))
     self._psi_langevin_c = cfg.dtype(order_parameter_Langevin_coefficient)
Exemple #16
0
 def homogeneous_external_field(self, homogeneous_external_field):
     self._H = cfg.dtype(homogeneous_external_field)
Exemple #17
0
    def __free_energy_minimization_psi(self, n_iter=1000):
        """Minimizes energy with respect to order parameter"""
        # NOTE: Tests show that
        #       - CG minimization is much faster than TD for 1-2 vortices (at least current implementation)
        #       - for ~30 of vortices CG demonstrates similar "performance" as TD

        # NOTE: works with material tiling
        # TODO: add external vector potential
        # TODO: add phase lock gridpoints

        assert not self.params.solveA

        self.vars._psi.sync()
        self.vars._vp.sync()

        self.cg_energies = []  # TMP

        #beta = 0.0  # First iteration is steepest descent, so make beta = 0.0

        # gpu arrays:
        # (g)dir     : search direction
        # (g)jac     : gradient
        # (g)jac_prev: gradient from previous iteration

        self.vars._alloc_free_temporary_gpu_storage('alloc')

        self.__gdir_psi.fill(0.0)
        for i in range(n_iter):

            # 1. Compute jacobians
            self.__gjac_psi = self._free_energy_jacobian_psi

            # 2. Compute beta
            # Polak–Ribière formula
            # TODO: consider other formulas, see e.g. https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method
            if i > 0:
                # d*(d-dp)/(dp*dp)
                # = d.re, d.im]*([d.re-dp.re, d.im-dp.im])/([dp.re, dp.im]*[dp.re, dp.im])
                # = (d.re*(d.re-dp.re) + d.im*(d.im-dp.im))/(dp.re*dp.re + dp.im*dp.im)
                # = (j.re*(j.re-jp.re) + j.im*(j.im-jp.im))/(jp.re*jp.re + jp.im*jp.im)
                # where j = -d

                self.__compute_beta_psi(self.__gjac_psi, self.__gjac_psi_prev)

            # 3. Update search direction
            self.__axmy_c_krnl(self.__gdir_psi,
                               self.__gjac_psi,
                               self.__gdir_psi,
                               self._beta_psi,
                               np.uint32(cfg.N),
                               block=(self.par.block_size, 1, 1),
                               grid=(self.par.grid_size, 1, 1))

            # 4. Compute alpha
            self._free_energy_conjgrad_coef_psi(self.__gdir_psi)
            alpha0 = self._cg_alpha_psi_min()
            #print('iter: ', i, 'F: ', c0, c1, c2, c3, c4, 'alpha: ', alpha0, 'beta: ', beta, flush=True)

            # 5. Update variables
            self.__axpy_c_krnl(self.__gdir_psi,
                               self.vars.order_parameter_h(),
                               self.vars.order_parameter_h(),
                               cfg.dtype(alpha0),
                               np.uint32(cfg.N),
                               block=(self.par.block_size, 1, 1),
                               grid=(self.par.grid_size, 1, 1))

            # 6. Save
            Utils.copy_dtod(self.__gjac_psi_prev, self.__gjac_psi)

            E0 = self.observables.free_energy  # TMP
            self.cg_energies.append(E0)  # TMP
            # if i%10 == 0:
            #     print('%3.d: E = %10.10f' % (i, E0)) # TMP

            if (i > 0
                    and np.abs(self.cg_energies[i] / self.cg_energies[i - 1] -
                               1.0) < self.__convergence_rtol):
                #print('CG converged in %d iterations with residual %g ' % ( i, np.abs(self.cg_energies[i]/self.cg_energies[i-1] - 1.0)))
                break

        self.vars._psi.need_dtoh_sync()

        self.vars._alloc_free_temporary_gpu_storage('free')
Exemple #18
0
 def normal_conductivity(self, normal_conductivity):
     assert isinstance(normal_conductivity,
                       (np.floating, float, np.integer,
                        int)) and normal_conductivity > 0.0
     self._sigma = cfg.dtype(normal_conductivity)
     self._rho = cfg.dtype(1.0 / normal_conductivity)