def __init__(self, par, mesh, _vars, params, observables): self.par = par self.mesh = mesh self.vars = _vars self.params = params self.fixed_vortices = self.params.fixed_vortices self.observables = observables self.solveA = self.params.solveA self.__order_parameter_phase_lock_krnl = self.par.get_function( 'order_parameter_phase_lock') self.__iterate_order_parameter_jacobi_step_krnl = self.par.get_function( 'iterate_order_parameter_jacobi_step') self.__iterate_vector_potential_jacobi_step_krnl = self.par.get_function( 'iterate_vector_potential_jacobi_step') self.__xpy_r_krnl = self.par.get_function('xpy_r') self.__xmy_r_krnl = self.par.get_function('xmy_r') self._random_t = np.uint32(1) if cfg.random_seed is not None: self._random_t = np.uint32(cfg.random_seed) # Alloc the rhs arrays self.vars._tmp_node_var = GArray(like=self.vars._psi) shapes = [(cfg.Nxa, cfg.Nya), (cfg.Nxb, cfg.Nyb)] self.vars._tmp_edge_var = GArray(shape=shapes, dtype=cfg.dtype) A_size = self.vars.vector_potential_h().size self.__gab_next = gpuarray.zeros(A_size, dtype=cfg.dtype) self.__gpsi_next = gpuarray.empty_like(self.vars.order_parameter_h()) self.__gr2_max = gpuarray.zeros(1, dtype=np.int32) # Adjust stopping criteria according to precision if cfg.dtype is np.float32: if cfg.stop_criterion_order_parameter < 1e-6: cfg.stop_criterion_order_parameter = 1e-6 if cfg.stop_criterion_vector_potential < 1e-6: cfg.stop_criterion_vector_potential = 1e-6 else: if cfg.stop_criterion_order_parameter < 1e-12: cfg.stop_criterion_order_parameter = 1e-12 if cfg.stop_criterion_vector_potential < 1e-12: cfg.stop_criterion_vector_potential = 1e-12 cfg.stop_criterion_order_parameter = cfg.dtype( cfg.stop_criterion_order_parameter) cfg.stop_criterion_vector_potential = cfg.dtype( cfg.stop_criterion_vector_potential)
def __init__(self, mesh, vars): self.mesh = mesh self.vars = vars self.fixed_vortices = FixedVortices(self.mesh, self.vars) self.solveA = False self.linear_coefficient = cfg.linear_coefficient # epsilon self.gl_parameter = cfg.gl_parameter # kappa self.normal_conductivity = cfg.normal_conductivity # sigma # homogeneous external magnetic field self._H = cfg.dtype(0.0) self.homogeneous_external_field_reset = cfg.homogeneous_external_field # x- and y- components of external vector potential for non-homogeneous external magnetic field self.ae, self.be = None, None # external and irregular vector potential # it should be kept self._vpei = (self.ae, self.be) + (ai, bi) self._vpei = None # non-homogeneous external magnetic field self.external_field = cfg.external_field self.order_parameter_Langevin_coefficient = cfg.order_parameter_Langevin_coefficient self.vector_potential_Langevin_coefficient = cfg.vector_potential_Langevin_coefficient
def test_sum_v(self, a_in, nv, ne, block_size=256): assert ne is 5 assert block_size >= 0 and block_size <= 1024 ga_in = gpuarray.to_gpu(cfg.dtype(a_in)) a_out = self.gsum_v(ga_in, nv, ne, block_size=block_size) return a_out
def gl_parameter(self, gl_parameter): if gl_parameter is None or np.isnan(gl_parameter) or np.isinf( gl_parameter): gl_parameter = np.inf assert isinstance(gl_parameter, (np.floating, float, np.integer, int)) and ( np.isposinf(gl_parameter) or gl_parameter > 0.0) self._kappa = cfg.dtype(gl_parameter) self.solveA = np.bool(not np.isposinf(self._kappa))
def test_sum(self, a_in, N, block_size=256): assert block_size >= 0 and block_size <= 1024 ga_in = gpuarray.to_gpu(cfg.dtype(a_in)) a_out = self.gsum(ga_in, block_size=block_size) ga_in.gpudata.free() return a_out
def _set_iterator_options(self, iterator_type, Nt=None, dt=None, T=None, mandatory_definition=True): assert iterator_type in ['order_parameter', 'vector_potential'] # if iterator_type == 'order_parameter': # if Nt is None: Nt = self.Nt # if dt is None: dt = self.dt # if T is None: T = self.T # elif iterator_type == 'vector_potential': # if Nt is None: Nt = self.NtA # if dt is None: dt = self.dtA # if T is None: T = self.TA if Nt is not None and T is not None and dt is None: dt = float(T) / Nt elif Nt is not None and T is None and dt is not None: T = float(dt) * Nt elif Nt is None and T is not None and dt is not None: Nt = int(np.round(T / dt)) elif Nt is not None and T is not None and dt is not None: assert np.isclose(T, dt * Nt) if mandatory_definition: assert isinstance( dt, (np.floating, float, np.integer, int)) and dt >= 0.0 assert isinstance(Nt, (np.integer, int)) and Nt >= 0 if iterator_type == 'order_parameter': self.Nt = np.int32(Nt) if Nt is not None else None self.dt = cfg.dtype(dt) if dt is not None else None self.T = cfg.dtype(T) if T is not None else None elif iterator_type == 'vector_potential': self.NtA = np.int32(Nt) if Nt is not None else None self.dtA = cfg.dtype(dt) if dt is not None else None self.TA = cfg.dtype(T) if T is not None else None
def _update_vector_potential(self, homogeneous_external_field, reset): assert isinstance(homogeneous_external_field, (np.floating, float, np.integer, int)) if reset: self._H = cfg.dtype(homogeneous_external_field) # TODO: need a fill method in GArray # self.a.fill(0.0) # self.b.fill(0.0) a, b = self.vars._vp.get_vec_h() a.fill(0.0) b.fill(0.0) self.vars._vp.need_htod_sync() self.vars._vp.sync() delta_H = self._H else: delta_H = -self._H self._H = cfg.dtype(homogeneous_external_field) delta_H += self._H self.vars._vp.sync() # TODO: implement GPU version of ab initialization # Possible set of gauges, A = [g*y*H, (1-g)*x*H, 0] with any g, 0 <= g <= 1 g = 0.5 _, yg = self.mesh.xy_a_grid xg, _ = self.mesh.xy_b_grid a, b = self.vars._vp.get_vec_h() a -= g * (yg - 0.5 * cfg.Ly) * delta_H b += (1.0 - g) * (xg - 0.5 * cfg.Lx) * delta_H self.vars._vp.need_htod_sync() self.vars._vp.sync()
def __init__( self, Nx=None, dx=None, Lx=None, # geometry Ny=None, dy=None, Ly=None, Nt=None, dt=None, T=None, # parameters for order parameter iterator NtA=None, dtA=None, TA=None, # parameters for order vector potential material_tiling=None, order_parameter='random', random_seed=None, random_level=1.0, gl_parameter=np. inf, # = lambda/xi, GL parameter; if None, np.nan, or np.inf then solveA = False normal_conductivity=1.0, # normal-state conductivity linear_coefficient=1.0, # linear coefficient in GL equation homogeneous_external_field=0.0, external_field=0.0, fixed_vortices=None, fixed_vortices_correction='cell centers', phase_lock_radius=None, device_id=0, dtype=np.float64, stop_criterion_order_parameter=1e-6, stop_criterion_vector_potential=1e-6, order_parameter_Langevin_coefficient=0.0, vector_potential_Langevin_coefficient=0.0, convergence_rtol=1e-6, # relative tolerance for convergence ): self.dtypes = (np.float32, np.float64) cfg.device_id = device_id assert dtype in self.dtypes cfg.dtype = dtype cfg.dtype_complex = { np.float32: np.complex64, np.float64: np.complex128 }[cfg.dtype] if Nx is not None and Lx is not None and dx is None: dx = float(Lx) / (Nx - 1) elif Nx is not None and Lx is None and dx is not None: Lx = float(dx) * (Nx - 1) elif Nx is None and Lx is not None and dx is not None: Nx = int(np.round(Lx / dx) + 1) elif Nx is not None and Lx is not None and dx is not None: assert np.isclose(Lx, dx * (Nx - 1)) else: raise 'Two out of three Nx, Lx, dx must be defined' if Ny is not None and Ly is not None and dy is None: dy = float(Ly) / (Ny - 1) elif Ny is not None and Ly is None and dy is not None: Ly = float(dy) * (Ny - 1) elif Ny is None and Ly is not None and dy is not None: Ny = int(np.round(Ly / dy) + 1) elif Ny is not None and Ly is not None and dy is not None: assert np.isclose(Ly, dy * (Ny - 1)) else: raise 'Two out of three Ny, Ly, and dy must be defined' assert isinstance(Lx, (np.floating, float, np.integer, int)) and Lx > 0.0 assert isinstance(Ly, (np.floating, float, np.integer, int)) and Ly > 0.0 assert isinstance(Nx, (np.integer, int)) and Nx >= 4 assert isinstance(Ny, (np.integer, int)) and Ny >= 4 cfg.Nx, cfg.Ny = np.int32(Nx), np.int32(Ny) cfg.Lx, cfg.Ly = cfg.dtype(Lx), cfg.dtype(Ly) cfg.dx, cfg.dy = cfg.dtype(dx), cfg.dtype(dy) cfg.N = cfg.Nx * cfg.Ny # number of centers of horizontal edges excluding boundaries cfg.Nxa, cfg.Nya = cfg.Nx - 1, cfg.Ny # number of centers of vertical edges excluding boundaries cfg.Nxb, cfg.Nyb = cfg.Nx, cfg.Ny - 1 cfg.Na, cfg.Nb = cfg.Nxa * cfg.Nya, cfg.Nxb * cfg.Nyb cfg.Nab = cfg.Na + cfg.Nb # number of cells cfg.Nxc, cfg.Nyc = cfg.Nx - 1, cfg.Ny - 1 cfg.Nc = cfg.Nxc * cfg.Nyc cfg.idx, cfg.idy = 1.0 / cfg.dx, 1.0 / cfg.dy cfg.idx2, cfg.idy2 = cfg.idx * cfg.idx, cfg.idy * cfg.idy cfg.idx2, cfg.idy2, cfg.idxy = cfg.idx * cfg.idx, cfg.idy * cfg.idy, cfg.idx * cfg.idy cfg.j_dx, cfg.j_dy = 1.0j * cfg.dx, 1.0j * cfg.dy cfg.material_tiling = material_tiling cfg.order_parameter = order_parameter cfg.random_seed = random_seed cfg.random_level = random_level cfg.gl_parameter = gl_parameter cfg.linear_coefficient = linear_coefficient cfg.normal_conductivity = normal_conductivity cfg.homogeneous_external_field = homogeneous_external_field cfg.external_field = external_field cfg.fixed_vortices = fixed_vortices cfg.fixed_vortices_correction = fixed_vortices_correction cfg.phase_lock_radius = phase_lock_radius cfg.order_parameter_Langevin_coefficient = order_parameter_Langevin_coefficient cfg.vector_potential_Langevin_coefficient = vector_potential_Langevin_coefficient cfg.Nt, cfg.dt, cfg.T = None, None, None cfg.NtA, cfg.dtA, cfg.TA = None, None, None assert isinstance(stop_criterion_order_parameter, (np.floating, float, np.integer, int)) and stop_criterion_order_parameter > 0.0 cfg.stop_criterion_order_parameter = cfg.dtype( stop_criterion_order_parameter) assert isinstance(stop_criterion_vector_potential, (np.floating, float, np.integer, int)) and stop_criterion_vector_potential > 0.0 cfg.stop_criterion_vector_potential = cfg.dtype( stop_criterion_vector_potential) # relative tolerance for convergence cfg.convergence_rtol = convergence_rtol self.cfg = cfg self.par = GLPar.Startup() self.par.red = GLPar.Reduction(self.par) self.mesh = GLMesh.Grid() self.vars = GLVars.Vars(self.par, self.mesh) self.params = GLVars.Params(self.mesh, self.vars) self.observables = GLObs.Observables(self.par, self.mesh, self.vars, self.params) self.solve = GLSolvers.Solvers(self.par, self.mesh, self.vars, self.params, self.observables) self.vortex_detector = GLObs.VortexDetector(self.vars, self.params, self.solve)
def __iterate_vector_potential_gpu(self): """Performs dtA-iteration of self.a/self.b on GPU""" # self.gabi += self.gab; no memory allocation if self.fixed_vortices._vpi is not None: self.__xpy_r_krnl( self.fixed_vortices.irregular_vector_potential_h(), self.vars.vector_potential_h(), np.uint32(cfg.N), block=(self.par.block_size, 1, 1), grid=(self.par.grid_size, 1, 1)) gabi_gab = self.fixed_vortices.irregular_vector_potential_h( ) # just a pointer else: gabi_gab = self.vars.vector_potential_h() # similar to gab_rhs = gab.copy(), but does not allocate new array Utils.copy_dtod(self.vars._tmp_edge_var_h(), self.vars.vector_potential_h()) #self.vars._tmp_edge_var.need_dtoh_sync() # if self.ab_langevin_c > 1e-16: # self.gab_rhs += self.ab_langevin_c*(curand(self.gab_rhs.shape, dtype=cfg.dtype) - 0.5) for j in range(1024): self.__gr2_max.fill(np.int32(0)) self.__iterate_vector_potential_jacobi_step_krnl( self.dt, self.params.gl_parameter_squared_h(), self.params._rho, self.params.homogeneous_external_field, self.mesh.material_tiling_h(), self.vars.order_parameter_h(), gabi_gab, self.vars._tmp_edge_var_h( ), # ab for right-hand side; does not change during Jacobi interactions self.vars.vector_potential_h(), # ab^{j} in Jacobi method self.__gab_next, # ab^{j+1} in Jacobi method self.params.vector_potential_Langevin_coefficient, np.uint32(j), self._random_t, cfg.stop_criterion_vector_potential, self.__gr2_max, grid=(self.par.grid_size, 1, 1), block=(self.par.block_size, 1, 1), ) # swap pointers, does not change arrays self.vars._vp._gdata, self.__gab_next = self.__gab_next, self.vars._vp._gdata #self.vars.vector_potential_h(), self.__gab_next = self.__gab_next, self.vars.vector_potential_h() # r2_max_norm = residual/stop_criterion r2_max_norm = 1.0e-4 * cfg.dtype(self.__gr2_max.get()[0]) # convergence criteria if r2_max_norm < 1.0: break self._random_t += np.uint32(1) self.vars._vp.need_dtoh_sync() # self.gabi -= self.gab; no memory allocation if self.fixed_vortices._vpi is not None: self.__xmy_r_krnl( self.fixed_vortices.irregular_vector_potential_h(), self.vars.vector_potential_h(), np.uint32(cfg.N), block=(self.par.block_size, 1, 1), grid=(self.par.grid_size, 1, 1))
def __iterate_order_parameter_gpu(self, gab_gabi): """Performs dt-iteration of self.psi on GPU""" # similar to gpsi_rhs = gpsi.copy(), but does not allocate new array Utils.copy_dtod(self.vars._tmp_node_var_h(), self.vars.order_parameter_h()) #self.vars._tmp_node_var.need_dtoh_sync() for j in range(1024): self.__gr2_max.fill(np.int32(0)) # TODO: prepare all cuda calls self.__iterate_order_parameter_jacobi_step_krnl( self.dt, self.params.linear_coefficient_scalar_h(), self.params.linear_coefficient_h(), self.mesh.material_tiling_h(), gab_gabi, self.vars._tmp_node_var_h( ), # psi for right-hand side; does not change during Jacobi interactions self.vars.order_parameter_h(), # psi^{j} in Jacobi method self.__gpsi_next, # psi^{j+1} in Jacobi method self.params.order_parameter_Langevin_coefficient, np.uint32(j), self._random_t, cfg.stop_criterion_order_parameter, self.__gr2_max, grid=(self.par.grid_size, 1, 1), block=(self.par.block_size, 1, 1), ) # swap pointers, does not change arrays # TODO: this is hard-wired for now since python doesn't allow # assignment for a function call.Sync Status not updated self.vars._psi._gdata, self.__gpsi_next = self.__gpsi_next, self.vars._psi._gdata #self.vars.order_parameter_h(), self.__gpsi_next = self.__gpsi_next, self.vars.order_parameter_h() # residual = max{|b-M*psi|} # r2_max_norm = residual/stop_criterion r2_max_norm = 1.0e-4 * cfg.dtype(self.__gr2_max.get()[0]) # convergence criteria if r2_max_norm < 1.0: break self._random_t += np.uint32(1) if self.fixed_vortices._phase_lock_ns is not None: block_size = 2 grid_size = Utils.intceil(self.fixed_vortices._phase_lock_ns.size, block_size) self.__order_parameter_phase_lock_krnl( self.vars.order_parameter_h(), np.int32(self.fixed_vortices._phase_lock_ns.size), self.fixed_vortices._phase_lock_ns_h(), grid=(grid_size, 1, 1), block=(block_size, 1, 1), ) self.vars._psi.need_dtoh_sync()
def __free_energy_minimization(self, n_iter=1000): """Minimizes energy with respect to order parameter and vector potential""" # TODO: check material tiling # TODO: add external vector potential # TODO: add phase lock gridpoints # TODO: Ideally there should be one entry for both minimzation self.vars._psi.sync() self.vars._vp.sync() self.cg_energies = [] # TMP #beta_psi = 0.0 # First iteration is steepest descent, so make beta = 0.0 #beta_A = 0.0 # First iteration is steepest descent, so make beta = 0.0 # gpu arrays: # (g)dir : search direction # (g)jac : gradient # (g)jac_prev: gradient from previous iteration #cuda.start_profiler() self.vars._alloc_free_temporary_gpu_storage('alloc') for i in range(n_iter): # 1. Compute jacobians self.__gjac_psi = self._free_energy_jacobian_psi self.__gjac_A = self._free_energy_jacobian_A # 2. Compute betas # use Polak–Ribière formula with resetting # TODO: consider other formulas, see e.g. https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method if i > 0: self.__compute_beta_psi(self.__gjac_psi, self.__gjac_psi_prev) self.__compute_beta_A(self.__gjac_A, self.__gjac_A_prev) # 3. Update search directions self.__axmy_c_krnl(self.__gdir_psi, self.__gjac_psi, self.__gdir_psi, self._beta_psi, np.uint32(cfg.N), block=(self.par.block_size, 1, 1), grid=(self.par.grid_size, 1, 1)) self.__axmy_r_krnl(self.__gdir_A, self.__gjac_A, self.__gdir_A, self._beta_A, np.uint32(cfg.Nab), block=(self.par.block_size, 1, 1), grid=(self.par.grid_size_A, 1, 1)) # 4. Compute alphas self._free_energy_conjgrad_coef(self.__gdir_psi, self.__gdir_A) alpha_psi, alpha_A = self._cg_alpha_min() #print('iter: ', i, 'c: ', self.__c, 'alpha, beta: ', alpha_psi, alpha_A, beta_psi, beta_A, flush=True) # 5. Update variables self.__axpy_c_krnl(self.__gdir_psi, self.vars.order_parameter_h(), self.vars.order_parameter_h(), cfg.dtype(alpha_psi), np.uint32(cfg.N), block=(self.par.block_size, 1, 1), grid=(self.par.grid_size, 1, 1)) self.__axpy_r_krnl(self.__gdir_A, self.vars.vector_potential_h(), self.vars.vector_potential_h(), cfg.dtype(alpha_A), np.uint32(cfg.Nab), block=(self.par.block_size, 1, 1), grid=(self.par.grid_size_A, 1, 1)) # 6. Save previous step Utils.copy_dtod(self.__gjac_psi_prev, self.__gjac_psi) Utils.copy_dtod(self.__gjac_A_prev, self.__gjac_A) E0 = self.observables.free_energy # TMP self.cg_energies.append(E0) # TMP # if i%10 == 0: # print('%3.d: E = %10.10f' % (i, E0)) # TMP if (i > 0 and np.abs(self.cg_energies[i] / self.cg_energies[i - 1] - 1.0) < self.__convergence_rtol): #print('CG converged in %d iterations with residual %g ' % ( i, np.abs(self.cg_energies[i]/self.cg_energies[i-1] - 1.0))) break #cuda.stop_profiler() self.vars._psi.need_dtoh_sync() self.vars._vp.need_dtoh_sync() self.vars._alloc_free_temporary_gpu_storage('free')
def gl_parameter_squared_h(self): if self.solveA: return cfg.dtype(self.gl_parameter**2) return cfg.dtype(-1.0)
def linear_coefficient_scalar_h(self): if self._epsilon.size == 1: return self._epsilon.get_h() return cfg.dtype(0.0)
def vector_potential_Langevin_coefficient( self, vector_potential_Langevin_coefficient): assert isinstance(vector_potential_Langevin_coefficient, (np.floating, float, np.integer, int)) self._ab_langevin_c = cfg.dtype(vector_potential_Langevin_coefficient)
def order_parameter_Langevin_coefficient( self, order_parameter_Langevin_coefficient): assert isinstance(order_parameter_Langevin_coefficient, (np.floating, float, np.integer, int)) self._psi_langevin_c = cfg.dtype(order_parameter_Langevin_coefficient)
def homogeneous_external_field(self, homogeneous_external_field): self._H = cfg.dtype(homogeneous_external_field)
def __free_energy_minimization_psi(self, n_iter=1000): """Minimizes energy with respect to order parameter""" # NOTE: Tests show that # - CG minimization is much faster than TD for 1-2 vortices (at least current implementation) # - for ~30 of vortices CG demonstrates similar "performance" as TD # NOTE: works with material tiling # TODO: add external vector potential # TODO: add phase lock gridpoints assert not self.params.solveA self.vars._psi.sync() self.vars._vp.sync() self.cg_energies = [] # TMP #beta = 0.0 # First iteration is steepest descent, so make beta = 0.0 # gpu arrays: # (g)dir : search direction # (g)jac : gradient # (g)jac_prev: gradient from previous iteration self.vars._alloc_free_temporary_gpu_storage('alloc') self.__gdir_psi.fill(0.0) for i in range(n_iter): # 1. Compute jacobians self.__gjac_psi = self._free_energy_jacobian_psi # 2. Compute beta # Polak–Ribière formula # TODO: consider other formulas, see e.g. https://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient_method if i > 0: # d*(d-dp)/(dp*dp) # = d.re, d.im]*([d.re-dp.re, d.im-dp.im])/([dp.re, dp.im]*[dp.re, dp.im]) # = (d.re*(d.re-dp.re) + d.im*(d.im-dp.im))/(dp.re*dp.re + dp.im*dp.im) # = (j.re*(j.re-jp.re) + j.im*(j.im-jp.im))/(jp.re*jp.re + jp.im*jp.im) # where j = -d self.__compute_beta_psi(self.__gjac_psi, self.__gjac_psi_prev) # 3. Update search direction self.__axmy_c_krnl(self.__gdir_psi, self.__gjac_psi, self.__gdir_psi, self._beta_psi, np.uint32(cfg.N), block=(self.par.block_size, 1, 1), grid=(self.par.grid_size, 1, 1)) # 4. Compute alpha self._free_energy_conjgrad_coef_psi(self.__gdir_psi) alpha0 = self._cg_alpha_psi_min() #print('iter: ', i, 'F: ', c0, c1, c2, c3, c4, 'alpha: ', alpha0, 'beta: ', beta, flush=True) # 5. Update variables self.__axpy_c_krnl(self.__gdir_psi, self.vars.order_parameter_h(), self.vars.order_parameter_h(), cfg.dtype(alpha0), np.uint32(cfg.N), block=(self.par.block_size, 1, 1), grid=(self.par.grid_size, 1, 1)) # 6. Save Utils.copy_dtod(self.__gjac_psi_prev, self.__gjac_psi) E0 = self.observables.free_energy # TMP self.cg_energies.append(E0) # TMP # if i%10 == 0: # print('%3.d: E = %10.10f' % (i, E0)) # TMP if (i > 0 and np.abs(self.cg_energies[i] / self.cg_energies[i - 1] - 1.0) < self.__convergence_rtol): #print('CG converged in %d iterations with residual %g ' % ( i, np.abs(self.cg_energies[i]/self.cg_energies[i-1] - 1.0))) break self.vars._psi.need_dtoh_sync() self.vars._alloc_free_temporary_gpu_storage('free')
def normal_conductivity(self, normal_conductivity): assert isinstance(normal_conductivity, (np.floating, float, np.integer, int)) and normal_conductivity > 0.0 self._sigma = cfg.dtype(normal_conductivity) self._rho = cfg.dtype(1.0 / normal_conductivity)