def create_pdf(self, iterable, tbins, trange, qbins, qrange, nreps=1): """Returns tuple: 1D array of channel hit counts, 3D array of (channel, time, charge) pdfs.""" first_element, iterable = itertoolset.peek(iterable) if isinstance(first_element, event.Event): iterable = self.photon_generator.generate_events(iterable) pdf_config = (tbins, trange, qbins, qrange) if pdf_config != self.pdf_config: self.pdf_config = pdf_config self.gpu_pdf.setup_pdf(self.detector.num_channels(), tbins, trange, qbins, qrange) else: self.gpu_pdf.clear_pdf() if nreps > 1: iterable = itertoolset.repeating_iterator(iterable, nreps) for ev in iterable: gpu_photons = gpu.GPUPhotons(ev.photons_beg) gpu_photons.propagate(self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) self.gpu_daq.begin_acquire() self.gpu_daq.acquire(gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) gpu_channels = self.gpu_daq.end_acquire() self.gpu_pdf.add_hits_to_pdf(gpu_channels) return self.gpu_pdf.get_pdfs()
def propagate(gpu_detector, number=10, nphotons=500000, nthreads_per_block=64, max_blocks=1024): "Returns the average number of photons propagated on the GPU per second." rng_states = gpu.get_rng_states(nthreads_per_block * max_blocks) run_times = [] for i in tools.progress(list(range(number))): pos = np.zeros((nphotons, 3)) dir = sample.uniform_sphere(nphotons) reorder = tools.argsort_direction(dir) dir = dir[reorder] pol = normalize(np.cross(sample.uniform_sphere(nphotons), dir)) wavelengths = np.random.uniform(400, 800, size=nphotons) photons = event.Photons(pos, dir, pol, wavelengths) gpu_photons = gpu.GPUPhotons(photons) t0 = time.time() gpu_photons.propagate(gpu_detector, rng_states, nthreads_per_block, max_blocks) cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nphotons / ufloat((np.mean(run_times), np.std(run_times)))
def pdf(gpu_detector, npdfs=10, nevents=100, nreps=16, ndaq=1, nthreads_per_block=64, max_blocks=1024): """ Returns the average number of 100 MeV events per second that can be histogrammed per second. Args: - gpu_instance, chroma.gpu.GPU The GPU instance passed to the GPUDetector constructor. - npdfs, int The number of pdf generations to average. - nevents, int The number of 100 MeV events to generate for each PDF. - nreps, int The number of times to propagate each event and add to PDF - ndaq, int The number of times to run the DAQ simulation on the propagated event and add it to the PDF. """ rng_states = gpu.get_rng_states(nthreads_per_block*max_blocks) gpu_daq = gpu.GPUDaq(gpu_detector) gpu_pdf = gpu.GPUPDF() gpu_pdf.setup_pdf(gpu_detector.nchannels, 100, (-0.5, 999.5), 10, (-0.5, 9.5)) run_times = [] for i in tools.progress(range(npdfs)): t0 = time.time() gpu_pdf.clear_pdf() vertex_gen = generator.vertex.constant_particle_gun('e-', (0,0,0), (1,0,0), 100) vertex_iter = itertools.islice(vertex_gen, nevents) for ev in g4generator.generate_events(vertex_iter): gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) gpu_photons.propagate(gpu_detector, rng_states, nthreads_per_block, max_blocks) for gpu_photon_slice in gpu_photons.iterate_copies(): for idaq in xrange(ndaq): gpu_daq.begin_acquire() gpu_daq.acquire(gpu_photon_slice, rng_states, nthreads_per_block, max_blocks) gpu_channels = gpu_daq.end_acquire() gpu_pdf.add_hits_to_pdf(gpu_channels, nthreads_per_block) hitcount, pdf = gpu_pdf.get_pdfs() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nevents*nreps*ndaq/ufloat((np.mean(run_times),np.std(run_times)))
def simulate(self, iterable, keep_photons_beg=False, keep_photons_end=False, keep_hits=True, run_daq=False, max_steps=100): if isinstance(iterable, event.Photons): first_element, iterable = iterable, [iterable] else: first_element, iterable = itertoolset.peek(iterable) if isinstance(first_element, event.Event): iterable = self.photon_generator.generate_events(iterable) elif isinstance(first_element, event.Photons): iterable = (event.Event(photons_beg=x) for x in iterable) elif isinstance(first_element, event.Vertex): iterable = (event.Event(vertices=[vertex]) for vertex in iterable) iterable = self.photon_generator.generate_events(iterable) for ev in iterable: gpu_photons = gpu.GPUPhotons(ev.photons_beg) gpu_photons.propagate(self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, max_steps=max_steps) ev.nphotons = len(ev.photons_beg.pos) if not keep_photons_beg: ev.photons_beg = None if keep_photons_end: ev.photons_end = gpu_photons.get() if hasattr(self.detector, 'num_channels') and keep_hits: ev.hits = gpu_photons.get_hits(self.gpu_geometry) # Skip running DAQ if we don't have one # Disabled by default because incredibly special-case if hasattr(self, 'gpu_daq') and run_daq: self.gpu_daq.begin_acquire() self.gpu_daq.acquire( gpu_photons, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) gpu_channels = self.gpu_daq.end_acquire() ev.channels = gpu_channels.get() yield ev
def propagate_photon(photon_type, numPhotons, nr_steps, geometry, nthreads_per_block, max_blocks, rng_states): gpu_photons = gpu.GPUPhotons(photon_type) gpu_geometry = gpu.GPUGeometry(geometry) photon_track = np.zeros((nr_steps, numPhotons, 3)) for i in range(nr_steps): gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block=nthreads_per_block, max_blocks=max_blocks, max_steps=1) photons = gpu_photons.get() photon_track[i, :, 0] = photons.pos[:, 0] photon_track[i, :, 1] = photons.pos[:, 1] photon_track[i, :, 2] = photons.pos[:, 2] return photons, photon_track
def setup_kernel(self, event_channels, bandwidth_iterable, trange, qrange, nreps=1, ndaq=1, time_only=True, scale_factor=1.0): '''Call this before calling eval_pdf_kernel(). Sets up the event information and computes an appropriate kernel bandwidth''' nchannels = len(event_channels.hit) self.gpu_pdf_kernel.setup_moments(nchannels, trange, qrange, time_only=time_only) # Compute bandwidth first_element, bandwidth_iterable = itertoolset.peek( bandwidth_iterable) if isinstance(first_element, event.Event): bandwidth_iterable = \ self.photon_generator.generate_events(bandwidth_iterable) for ev in bandwidth_iterable: gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) gpu_photons.propagate(self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) for gpu_photon_slice in gpu_photons.iterate_copies(): for idaq in range(ndaq): self.gpu_daq.begin_acquire() self.gpu_daq.acquire( gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) gpu_channels = self.gpu_daq.end_acquire() self.gpu_pdf_kernel.accumulate_moments(gpu_channels) self.gpu_pdf_kernel.compute_bandwidth(event_channels.hit, event_channels.t, event_channels.q, scale_factor=scale_factor)
def load_photons(number=100, nphotons=500000): """Returns the average number of photons moved to the GPU device memory per second.""" pos = np.zeros((nphotons, 3)) dir = sample.uniform_sphere(nphotons) pol = normalize(np.cross(sample.uniform_sphere(nphotons), dir)) wavelengths = np.random.uniform(400, 800, size=nphotons) photons = event.Photons(pos, dir, pol, wavelengths) run_times = [] for i in tools.progress(list(range(number))): t0 = time.time() gpu_photons = gpu.GPUPhotons(photons) cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nphotons / ufloat((np.mean(run_times), np.std(run_times)))
def testGPUPDF(self): '''Create a hit count and (q,t) PDF for 10 MeV events in MicroLBNE''' g4generator = G4ParallelGenerator(1, water) context = gpu.create_cuda_context() gpu_geometry = gpu.GPUDetector(self.detector) nthreads_per_block, max_blocks = 64, 1024 rng_states = gpu.get_rng_states(nthreads_per_block * max_blocks) gpu_daq = gpu.GPUDaq(gpu_geometry) gpu_pdf = gpu.GPUPDF() gpu_pdf.setup_pdf(self.detector.num_channels(), 100, (-0.5, 999.5), 10, (-0.5, 9.5)) gpu_pdf.clear_pdf() for ev in g4generator.generate_events( itertools.islice(self.vertex_gen, 10)): gpu_photons = gpu.GPUPhotons(ev.photons_beg) gpu_photons.propagate(gpu_geometry, rng_states, nthreads_per_block, max_blocks) gpu_daq.begin_acquire() gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks) gpu_channels = gpu_daq.end_acquire() gpu_pdf.add_hits_to_pdf(gpu_channels) hitcount, pdf = gpu_pdf.get_pdfs() self.assertTrue((hitcount > 0).any()) self.assertTrue((pdf > 0).any()) # Consistency checks for i, nhits in enumerate(hitcount): self.assertEqual(nhits, pdf[i].sum()) context.pop()
def eval_kernel(self, event_channels, kernel_iterable, trange, qrange, nreps=1, ndaq=1, naverage=1, time_only=True): """Returns tuple: 1D array of channel hit counts, 1D array of PDF probability densities.""" self.gpu_pdf_kernel.setup_kernel(event_channels.hit, event_channels.t, event_channels.q) first_element, kernel_iterable = itertoolset.peek(kernel_iterable) if isinstance(first_element, event.Event): kernel_iterable = \ self.photon_generator.generate_events(kernel_iterable) # Evaluate likelihood using this bandwidth for ev in kernel_iterable: gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) gpu_photons.propagate(self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) for gpu_photon_slice in gpu_photons.iterate_copies(): for idaq in range(ndaq): self.gpu_daq.begin_acquire() self.gpu_daq.acquire( gpu_photon_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) gpu_channels = self.gpu_daq.end_acquire() self.gpu_pdf_kernel.accumulate_kernel(gpu_channels) return self.gpu_pdf_kernel.get_kernel_eval()
def _simulate_batch(self, batch_events, keep_photons_beg=False, keep_photons_end=False, keep_hits=True, run_daq=False, max_steps=100, verbose=False): '''Assumes batch_events is a list of Event objects with photons_beg having evidx set to the index in the array. Yields the fully formed events. Do not call directly.''' t_start = timer() #Idea: allocate memory on gpu and copy photons into it, instead of concatenating on CPU? batch_photons = event.Photons.join( [ev.photons_beg for ev in batch_events]) batch_bounds = np.cumsum( np.concatenate([[0], [len(ev.photons_beg) for ev in batch_events]])) #This copy to gpu has a _lot_ of overhead, want 100k photons at least, hence batches #Assume triangles, and weights are unimportant to copy to GPU t_copy_start = timer() gpu_photons = gpu.GPUPhotons(batch_photons, copy_triangles=False, copy_weights=False) t_copy_end = timer() if verbose: print('GPU copy took %0.2f s' % (t_copy_end - t_copy_start)) t_prop_start = timer() tracking = gpu_photons.propagate( self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, max_steps=max_steps, track=self.photon_tracking) t_prop_end = timer() if verbose: print('GPU propagate took %0.2f s' % (t_prop_end - t_prop_start)) t_end = timer() if verbose: print('Batch took %0.2f s' % (t_end - t_start)) if keep_photons_end: batch_photons_end = gpu_photons.get() if hasattr(self.detector, 'num_channels') and keep_hits: batch_hits = gpu_photons.get_hits(self.gpu_geometry) for i, (batch_ev, (start_photon, end_photon)) in enumerate( zip(batch_events, zip(batch_bounds[:-1], batch_bounds[1:]))): if not keep_photons_beg: batch_ev.photons_beg = None if self.photon_tracking: step_photon_ids, step_photons = tracking nphotons = end_photon - start_photon photon_tracks = [[] for i in range(nphotons)] for step_ids, step_photons in zip(step_photon_ids, step_photons): mask = np.logical_and(step_ids >= start_photon, step_ids < end_photon) if np.count_nonzero(mask) == 0: break photon_ids = step_ids[mask] - start_photon photons = step_photons[mask] #Indexing Photons with a scalar changes the internal array shapes... any(photon_tracks[id].append(photons[i]) for i, id in enumerate(photon_ids)) batch_ev.photon_tracks = [ event.Photons.join(photons, concatenate=False) if len(photons) > 0 else event.Photons() for photons in photon_tracks ] if keep_photons_end: batch_ev.photons_end = batch_photons_end[ start_photon:end_photon] if hasattr(self.detector, 'num_channels') and keep_hits: #Thought: this is kind of expensive computationally, but keep_hits is for diagnostics batch_ev.hits = { chan: batch_hits[chan][batch_hits[chan].evidx == i] for chan in batch_hits } batch_ev.hits = { chan: batch_ev.hits[chan] for chan in batch_ev.hits if len(batch_ev.hits[chan]) > 0 } if hasattr(self, 'gpu_daq') and run_daq: #Must run DAQ per event, or design a much more complicated daq algorithm self.gpu_daq.begin_acquire() self.gpu_daq.acquire( gpu_photons, self.rng_states, start_photon=start_photon, nphotons=(end_photon - start_photon), nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) gpu_channels = self.gpu_daq.end_acquire() batch_ev.channels = gpu_channels.get() yield batch_ev
def eval_pdf(self, event_channels, iterable, min_twidth, trange, min_qwidth, qrange, min_bin_content=100, nreps=1, ndaq=1, nscatter=1, time_only=True): """Returns tuple: 1D array of channel hit counts, 1D array of PDF probability densities.""" ndaq_per_rep = 64 ndaq_reps = ndaq // ndaq_per_rep gpu_daq = gpu.GPUDaq(self.gpu_geometry, ndaq=ndaq_per_rep) self.gpu_pdf.setup_pdf_eval(event_channels.hit, event_channels.t, event_channels.q, min_twidth, trange, min_qwidth, qrange, min_bin_content=min_bin_content, time_only=True) first_element, iterable = itertoolset.peek(iterable) if isinstance(first_element, event.Event): iterable = self.photon_generator.generate_events(iterable) elif isinstance(first_element, event.Photons): iterable = (event.Event(photons_beg=x) for x in iterable) for ev in iterable: gpu_photons_no_scatter = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) gpu_photons_scatter = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps * nscatter) gpu_photons_no_scatter.propagate( self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, use_weights=True, scatter_first=-1, max_steps=10) gpu_photons_scatter.propagate( self.gpu_geometry, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, use_weights=True, scatter_first=1, max_steps=5) nphotons = gpu_photons_no_scatter.true_nphotons # same for scatter for i in range(gpu_photons_no_scatter.ncopies): start_photon = i * nphotons gpu_photon_no_scatter_slice = gpu_photons_no_scatter.select( event.SURFACE_DETECT, start_photon=start_photon, nphotons=nphotons) gpu_photon_scatter_slices = [ gpu_photons_scatter.select( event.SURFACE_DETECT, start_photon=(nscatter * i + j) * nphotons, nphotons=nphotons) for j in range(nscatter) ] if len(gpu_photon_no_scatter_slice) == 0: continue #weights = gpu_photon_slice.weights.get() #print 'weights', weights.min(), weights.max() for j in range(ndaq_reps): gpu_daq.begin_acquire() gpu_daq.acquire(gpu_photon_no_scatter_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks) for scatter_slice in gpu_photon_scatter_slices: gpu_daq.acquire( scatter_slice, self.rng_states, nthreads_per_block=self.nthreads_per_block, max_blocks=self.max_blocks, weight=1.0 / nscatter) gpu_channels = gpu_daq.end_acquire() self.gpu_pdf.accumulate_pdf_eval( gpu_channels, nthreads_per_block=ndaq_per_rep) return self.gpu_pdf.get_pdf_eval()
def pdf_eval(gpu_detector, npdfs=10, nevents=25, nreps=16, ndaq=128, nthreads_per_block=64, max_blocks=1024): """ Returns the average number of 100 MeV events that can be histogrammed per second. Args: - gpu_instance, chroma.gpu.GPU The GPU instance passed to the GPUDetector constructor. - npdfs, int The number of pdf generations to average. - nevents, int The number of 100 MeV events to generate for each PDF. - nreps, int The number of times to propagate each event and add to PDF - ndaq, int The number of times to run the DAQ simulation on the propagated event and add it to the PDF. """ rng_states = gpu.get_rng_states(nthreads_per_block * max_blocks) # Make data event data_ev = next( g4generator.generate_events( itertools.islice( generator.vertex.constant_particle_gun('e-', (0, 0, 0), (1, 0, 0), 100), 1))) gpu_photons = gpu.GPUPhotons(data_ev.photons_beg) gpu_photons.propagate(gpu_detector, rng_states, nthreads_per_block, max_blocks) gpu_daq = gpu.GPUDaq(gpu_detector) gpu_daq.begin_acquire() gpu_daq.acquire(gpu_photons, rng_states, nthreads_per_block, max_blocks).get() data_ev_channels = gpu_daq.end_acquire() # Setup PDF evaluation gpu_daq = gpu.GPUDaq(gpu_detector, ndaq=ndaq) gpu_pdf = gpu.GPUPDF() gpu_pdf.setup_pdf_eval(data_ev_channels.hit, data_ev_channels.t, data_ev_channels.q, 0.05, (-0.5, 999.5), 1.0, (-0.5, 20), min_bin_content=20, time_only=True) run_times = [] for i in tools.progress(list(range(npdfs))): t0 = time.time() gpu_pdf.clear_pdf_eval() vertex_gen = generator.vertex.constant_particle_gun( 'e-', (0, 0, 0), (1, 0, 0), 100) vertex_iter = itertools.islice(vertex_gen, nevents) for ev in g4generator.generate_events(vertex_iter): gpu_photons = gpu.GPUPhotons(ev.photons_beg, ncopies=nreps) gpu_photons.propagate(gpu_detector, rng_states, nthreads_per_block, max_blocks) for gpu_photon_slice in gpu_photons.iterate_copies(): gpu_daq.begin_acquire() gpu_photon_slice = gpu_photon_slice.select( event.SURFACE_DETECT) gpu_daq.acquire(gpu_photon_slice, rng_states, nthreads_per_block, max_blocks) gpu_channels = gpu_daq.end_acquire() gpu_pdf.accumulate_pdf_eval(gpu_channels, nthreads_per_block) cuda.Context.get_current().synchronize() elapsed = time.time() - t0 if i > 0: # first kernel call incurs some driver overhead run_times.append(elapsed) return nevents * nreps * ndaq / ufloat( (np.mean(run_times), np.std(run_times)))