def pdf(gpu_detector, npdfs=10, nevents=100, nreps=16, ndaq=1, nthreads_per_block=64, max_blocks=1024): """ Returns the average number of 100 MeV events per second that can be histogrammed per second. Args: - gpu_instance, chroma.gpu.GPU The GPU instance passed to the GPUDetector constructor. - npdfs, int The number of pdf generations to average. - nevents, int The number of 100 MeV events to generate for each PDF. - nreps, int The number of times to propagate each event and add to PDF - ndaq, int The number of times to run the DAQ simulation on the propagated event and add it to the PDF. """ rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) gpu_daq = gpu.GPUDaq(gpu_detector) gpu_pdf = gpu.GPUPDF() gpu_pdf.setup_pdf(gpu_detector.nchannels, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) run_times = [] for i in tools.progress(range(npdfs)): t0 = time.time() gpu_pdf.clear_pdf() vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100) vertex_iter = itertools.islice(vertex_gen, nevents) for ev in g4generator.generate_events(vertex_iter): gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) gpu_photons.propagate(gpu_detector, rng_states, nthreads_per_block, max_blocks) for gpu_photon_slice in gpu_photons.iterate_copies(): for idaq in xrange(ndaq): gpu_daq.begin_acquire() gpu_daq.acquire(gpu_photon_slice, rng_states, nthreads_per_block, max_blocks) gpu_channels = gpu_daq.end_acquire() gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block) hitcount, pdf = gpu_pdf.get_pdfs() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nevents*nreps*ndaq/ufloat((np.mean(run_times),np.std(run_times)))
def __init__(self, detector, seed=None, cuda_device=None, particle_tracking=False, photon_tracking=False, geant4_processes=4, nthreads_per_block=64, max_blocks=1024): self.detector = detector self.nthreads_per_block = nthreads_per_block self.max_blocks = max_blocks self.photon_tracking = photon_tracking if seed is None: self.seed = pick_seed() else: self.seed = seed # We have three generators to seed: numpy.random, GEANT4, and CURAND. # The latter two are done below. np.random.seed(self.seed) if geant4_processes > 0: self.photon_generator = generator.photon.G4ParallelGenerator( geant4_processes, detector.detector_material, base_seed=self.seed, tracking=particle_tracking) else: self.photon_generator = None self.context = gpu.create_cuda_context(cuda_device) if hasattr(detector, 'num_channels'): self.gpu_geometry = gpu.GPUDetector(detector) self.gpu_daq = gpu.GPUDaq(self.gpu_geometry) self.gpu_pdf = gpu.GPUPDF() self.gpu_pdf_kernel = gpu.GPUKernelPDF() else: self.gpu_geometry = gpu.GPUGeometry(detector) self.rng_states = gpu.get_rng_states(self.nthreads_per_block * self.max_blocks, seed=self.seed) self.pdf_config = None
def testGPUPDF(self): '''Create a hit count and (q,t) PDF for 10 MeV events in MicroLBNE''' g4generator = G4ParallelGenerator(1, water) context = gpu.create_cuda_context() gpu_geometry = gpu.GPUDetector(self.detector) nthreads_per_block, max_blocks = 64, 1024 rng_states = gpu.get_rng_states(nthreads_per_block * max_blocks) gpu_daq = gpu.GPUDaq(gpu_geometry) gpu_pdf = gpu.GPUPDF() gpu_pdf.setup_pdf(self.detector.num_channels(), 100, (-0.5, 999.5), 10, (-0.5, 9.5)) gpu_pdf.clear_pdf() for ev in g4generator.generate_events( itertools.islice(self.vertex_gen, 10)): gpu_photons = gpu.GPUPhotons(ev.photons_beg) gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block, max_blocks) gpu_daq.begin_acquire() gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks) gpu_channels = gpu_daq.end_acquire() gpu_pdf.add_hits_to_pdf(gpu_channels) hitcount, pdf = gpu_pdf.get_pdfs() self.assertTrue((hitcount > 0).any()) self.assertTrue((pdf > 0).any()) # Consistency checks for i, nhits in enumerate(hitcount): self.assertEqual(nhits, pdf[i].sum()) context.pop()
def pdf_eval(gpu_detector, npdfs=10, nevents=25, nreps=16, ndaq=128, nthreads_per_block=64, max_blocks=1024): """ Returns the average number of 100 MeV events that can be histogrammed per second. Args: - gpu_instance, chroma.gpu.GPU The GPU instance passed to the GPUDetector constructor. - npdfs, int The number of pdf generations to average. - nevents, int The number of 100 MeV events to generate for each PDF. - nreps, int The number of times to propagate each event and add to PDF - ndaq, int The number of times to run the DAQ simulation on the propagated event and add it to the PDF. """ rng_states = gpu.get_rng_states(nthreads_per_block * max_blocks) # Make data event data_ev = next( g4generator.generate_events( itertools.islice( generator.vertex.constant_particle_gun('e-', (0, 0, 0), (1, 0, 0), 100), 1))) gpu_photons = gpu.GPUPhotons(data_ev.photons_beg) gpu_photons.propagate(gpu_detector, rng_states, nthreads_per_block, max_blocks) gpu_daq = gpu.GPUDaq(gpu_detector) gpu_daq.begin_acquire() gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks).get() data_ev_channels = gpu_daq.end_acquire() # Setup PDF evaluation gpu_daq = gpu.GPUDaq(gpu_detector, ndaq=ndaq) gpu_pdf = gpu.GPUPDF() gpu_pdf.setup_pdf_eval(data_ev_channels.hit, data_ev_channels.t, data_ev_channels.q, 0.05, (-0.5, 999.5), 1.0, (-0.5, 20), min_bin_content=20, time_only=True) run_times = [] for i in tools.progress(list(range(npdfs))): t0 = time.time() gpu_pdf.clear_pdf_eval() vertex_gen = generator.vertex.constant_particle_gun( 'e-', (0, 0, 0), (1, 0, 0), 100) vertex_iter = itertools.islice(vertex_gen, nevents) for ev in g4generator.generate_events(vertex_iter): gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) gpu_photons.propagate(gpu_detector, rng_states, nthreads_per_block, max_blocks) for gpu_photon_slice in gpu_photons.iterate_copies(): gpu_daq.begin_acquire() gpu_photon_slice = gpu_photon_slice.select( event.SURFACE_DETECT) gpu_daq.acquire(gpu_photon_slice, rng_states, nthreads_per_block, max_blocks) gpu_channels = gpu_daq.end_acquire() gpu_pdf.accumulate_pdf_eval(gpu_channels, nthreads_per_block) cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nevents * nreps * ndaq / ufloat( (np.mean(run_times), np.std(run_times)))