def __init__(self, ctx=None, queue=None): self.ctx = ctx self.queue = queue if self.ctx == None: self.ctx = cl.create_some_context() if self.queue == None: self.queue = cl.CommandQueue( self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) absolutePathToKernels = os.path.dirname(os.path.realpath(__file__)) src = open(absolutePathToKernels + '/cooling_along_x_advance.cl', 'r').read() self.CoolingAlongXAdvF = cl.Program(self.ctx, src) try: self.CoolingAlongXAdvF.build() except: print("Error:") print( self.CoolingAlongXAdvF.get_build_info( self.ctx.devices[0], cl.program_build_info.LOG)) raise self.CoolingAlongXAdvF.advance_ptcls_cooling_along_x.set_scalar_arg_dtypes( [ None, None, None, None, None, None, None, None, None, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.int32 ]) self.CoolingAlongXAdvD = cl.Program(self.ctx, src) try: self.CoolingAlongXAdvD.build() except: print("Error:") print( self.CoolingAlongXAdvD.get_build_info( self.ctx.devices[0], cl.program_build_info.LOG)) raise self.CoolingAlongXAdvD.advance_ptcls_cooling_along_x.set_scalar_arg_dtypes( [ None, None, None, None, None, None, None, None, None, numpy.float64, numpy.float64, numpy.float64, numpy.float64, numpy.float64, numpy.int32 ]) self.generator = cl_random.RanluxGenerator(self.queue, num_work_items=128, luxury=1, seed=None, no_warmup=False, use_legacy_init=False, max_work_items=None)
def __init__(self, ctx=None, queue=None): self.diffusionConstant = 0.0 self.ctx = ctx self.queue = queue if self.ctx == None: self.ctx = cl.create_some_context() if self.queue == None: self.queue = cl.CommandQueue( self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) self.mf = cl.mem_flags self.generator = cl_random.RanluxGenerator(self.queue, num_work_items=128, luxury=1, seed=None, no_warmup=False, use_legacy_init=False, max_work_items=None)
def make_ranlux_generator(cl_ctx): queue = cl.CommandQueue(cl_ctx) return clrandom.RanluxGenerator(queue)
def __init__(self, macrospin_object): super(Simulation2D, self).__init__() self.mo = macrospin_object kernel_text = self.mo.render_kernel() self.dirname = os.path.dirname(__file__) with open(self.dirname + '/templates/rendered-kernel.cl', 'w') as f: f.write(kernel_text) self.ctx = cl.create_some_context() self.queue = cl.CommandQueue(self.ctx) self.prg = cl.Program(self.ctx, kernel_text).build() self.evolve = self.prg.evolve if self.mo.temperature > 0: self.evolve.set_scalar_arg_dtypes( [None, None, None, None, None, np.float32]) else: print("No temp") self.evolve.set_scalar_arg_dtypes( [None, None, None, None, np.float32]) self.reduce_m = self.prg.reduce_m self.reduce_m.set_scalar_arg_dtypes([None, None, None, np.int32]) # self.normalize_m = self.prg.normalize_m self.current_timepoint = 0 self.update_m_of_t = self.prg.update_m_of_t self.update_m_of_t.set_scalar_arg_dtypes( [None, None, None, np.int32, np.int32, np.int32]) # Define random number generator self.ran_gen = ran.RanluxGenerator(self.queue, luxury=0) # Declare the GPU bound arrays self.theta = cl.array.zeros(self.queue, self.mo.N, np.float32) self.phi = cl.array.zeros(self.queue, self.mo.N, np.float32) if self.mo.time_traces: self.m_of_t = cl.array.zeros(self.queue, self.mo.pixels * self.mo.time_points, cl.array.vec.float4) self.dW = cl.array.zeros(self.queue, self.mo.N, cl.array.vec.float4) self.phase_diagram = cl.array.zeros(self.queue, self.mo.pixels, np.float32) # Fill out the magnetization initial conditions and push to card self.initial_theta = np.zeros(self.mo.N, dtype=np.float32) self.initial_phi = np.zeros(self.mo.N, dtype=np.float32) self.initial_theta[:] = self.mo.initial_theta self.initial_phi[:] = self.mo.initial_phi cl.enqueue_copy(self.queue, self.theta.data, self.initial_theta) cl.enqueue_copy(self.queue, self.phi.data, self.initial_phi) # Phase diagram values self.first_vals = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.mo.first_vals_np) self.second_vals = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.mo.second_vals_np)
def initialize_buffers(self): """Initialize OpenGL and OpenCL buffers and interop objects, and compile the OpenCL kernel. """ # Load kernel with open("src/macrospin_gpu/templates/costm-amp-dur-gl.cl" ) as template_file: kernel_template = Template(template_file.read()) # Render the CUDA module metaprogram template kernel = kernel_template.render( alpha=damping, dt=dt, sqrtDt=sqrtDt, nuSqrtDt=nu * sqrtDt, nu=nu, nu2=nu * nu, nxx=Nxx, nyy=Nyy, nzz=Nzz, hx=hExtX / Ms, hy=hExtY / Ms, hz=hExtZ / Ms, stt_op=stt_op_pre, stt_ip=stt_ip_pre, lambda2_plus1_op=lambda_op**2 + 1.0, lambda2_plus1_ip=lambda_ip**2 + 1.0, lambda2_minus1_op=lambda_op**2 - 1.0, lambda2_minus1_ip=lambda_ip**2 - 1.0, rise_time=65.0e-12, fall_time=100.0e-12, pause_before=pause_before, pause_after=pause_after, ) with open('rendered-kernel.cl', 'w') as f: f.write(kernel) # Initialize program and setup argument types for kernels self.ctx, self.queue = clinit() self.prg = cl.Program(self.ctx, kernel).build() self.evolve = self.prg.evolve self.evolve.set_scalar_arg_dtypes([None, None, None, None, np.float32]) self.reduce_m = self.prg.reduce_m self.reduce_m.set_scalar_arg_dtypes([None, None, np.int32]) self.normalize_m = self.prg.normalize_m self.update_m_of_t = self.prg.update_m_of_t self.update_m_of_t.set_scalar_arg_dtypes( [None, None, None, np.int32, np.int32, np.int32]) # release the PyOpenCL queue self.queue.finish() # Define random number generator self.rg = ran.RanluxGenerator(self.queue) # Data dimensions self.realizations = 1 # Averages over different realizations of the noise process self.current_steps = 256 self.duration_steps = 8 self.N = self.current_steps * self.duration_steps * self.realizations self.time_points = 64 # How many points to store as a function of time # Current state self.current_iter = 0 self.current_time = 0.0 self.current_timepoint = 0 # Declare the GPU bound arrays self.m = cl.array.zeros(self.queue, self.N, cl.array.vec.float4) self.dW = cl.array.zeros(self.queue, self.N, cl.array.vec.float4) self.phase_diagram = cl.array.zeros( self.queue, self.current_steps * self.duration_steps, np.float32) # Create the GPU buffers that contain the phase diagram parameters self.durations_np = np.linspace(min_duration, max_duration, self.duration_steps).astype(np.float32) self.currents_np = np.linspace(min_current, max_current, self.current_steps).astype(np.float32) self.m_of_t_np = np.ndarray( (self.current_steps * self.duration_steps * self.time_points, 4), dtype=np.float32) self.colors_np = np.ndarray( (self.current_steps * self.duration_steps * self.time_points, 4), dtype=np.float32) self.durations = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.durations_np) self.currents = cl.Buffer(self.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=self.currents_np) # Load initial magnetization state initial_m = np.zeros(self.N, dtype=cl.array.vec.float4) initial_m[:] = (1, 0, 0, 0) cl.enqueue_copy(self.queue, self.m.data, initial_m) self.colors_np[:, :] = [1., 1., 1., 1.] # White particles self.colbuf = vbo.VBO(data=self.colors_np, usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER) self.colbuf.bind() # For the glMultiDraw command we need an array of offsets and draw lengths self.start_indices = np.arange(0, self.current_steps * self.duration_steps * self.time_points, self.time_points, dtype=np.int32) self.draw_lengths = self.time_points * np.ones( self.current_steps * self.duration_steps, dtype=np.int32) # Declare an empty OpenGL VBO and bind it self.glbuf = vbo.VBO(data=self.m_of_t_np, usage=gl.GL_DYNAMIC_DRAW, target=gl.GL_ARRAY_BUFFER) self.glbuf.bind() # create an interop object to access to GL VBO from OpenCL self.glclbuf = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, int(self.glbuf.buffers[0])) self.colclbuf = cl.GLBuffer(self.ctx, cl.mem_flags.READ_WRITE, int(self.colbuf.buffers[0]))
def __init__(self, ctx=None, queue=None): self._PlanckConstantReduced = 1.0545717e-34 # wavelength of cooling laser lam = 313.0e-9 # wave vector self.k0 = numpy.array([0, 0, 2.0 * numpy.pi / lam], dtype=numpy.float32) self.x0 = numpy.array([0, 0, 0], dtype=numpy.float32) # 1/e radius of cooling laser self.sigma = 1.0e-3 # line width (unsaturated) self.gamma = 2.0 * numpy.pi * 19.0e6 # Detuning at zero velocity self.delta0 = -0.5 * self.gamma # Saturation parameter self.S = 0.1 self.ctx = ctx self.queue = queue if self.ctx == None: self.ctx = cl.create_some_context() if self.queue == None: self.queue = cl.CommandQueue( self.ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) absolutePathToKernels = os.path.dirname(os.path.realpath(__file__)) src = open(absolutePathToKernels + '/cooling_laser_advance.cl', 'r').read() self.program = cl.Program(self.ctx, src) try: self.program.build() except: print("Error:") print( self.program.get_build_info(self.ctx.devices[0], cl.program_build_info.LOG)) raise self.program.compute_mean_scattered_photons_homogeneous_beam.set_scalar_arg_dtypes( [ None, None, None, None, None, None, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.int32, None ]) self.program.compute_mean_scattered_photons_gaussian_beam.set_scalar_arg_dtypes( [ None, None, None, None, None, None, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.int32, None ]) self.program.countEmissions.set_scalar_arg_dtypes( [None, None, numpy.int32, None, numpy.int32]) self.program.computeKicks.set_scalar_arg_dtypes([ None, None, numpy.int32, None, numpy.float32, numpy.float32, numpy.float32, numpy.float32, numpy.float32, None, None, None, numpy.int32 ]) self.generator = cl_random.RanluxGenerator(self.queue, num_work_items=128, luxury=1, seed=None, no_warmup=False, use_legacy_init=False, max_work_items=None)