def test_find_peaks(hits, min_channels, min_area): hits['area'] = 1 gap_threshold = 10 peaks = strax.find_peaks(hits, adc_to_pe=np.ones(1), right_extension=0, left_extension=0, gap_threshold=gap_threshold, min_channels=min_channels, min_area=min_area) # Check sanity assert np.all(peaks['length'] > 0) assert np.all(peaks['n_hits'] > 0) # Check if requirements satisfied if min_area != 0: assert np.all(peaks['area'] >= min_area) if min_channels != 1: assert np.all(peaks['n_hits'] >= min_channels) assert np.all(peaks['max_gap'] < gap_threshold) # Without requirements, all hits must occur in a peak if min_area == 0 and min_channels == 1: assert np.sum(peaks['n_hits']) == len(hits) assert np.all(strax.fully_contained_in(hits, peaks) > -1) # Since no extensions, peaks must be at least gap_threshold apart starts = peaks['time'] ends = peaks['time'] + peaks['length'] * peaks['dt'] assert np.all(ends[:-1] + gap_threshold <= starts[1:]) assert np.all(starts == np.sort(starts)), "Not sorted" assert np.all(peaks['time'] < strax.endtime(peaks)), "Non+ peak length"
def test_time_selection(d, second_time, second_dt): """ Test that both 'touching' and 'fully_contained' give the same results as 'strax.fully_contained_in' and 'strax.touching_windows' respectively :param d: test-data from get_dummy_data :param second_dt: the ofset w.r.t. the first :return: None """ container = np.zeros(1, dtype=strax.time_fields) container['time'] = second_time container['endtime'] = second_time + second_dt time_range = (second_time, second_time + second_dt) # Fully contained in selected_data = strax.apply_selection(d, time_range=time_range, time_selection='fully_contained') contained = strax.fully_contained_in(d, container) selected_data_fc = d[contained != -1] assert np.all(selected_data == selected_data_fc) # TW selected_data = strax.apply_selection(d, time_range=time_range, time_selection='touching') windows = strax.touching_windows(d, container, window=0) assert np.diff(windows[0]) == len(selected_data) if len(windows) and len(selected_data): assert np.all(selected_data == d[windows[0][0]:windows[0][1]])
def calc_delta_time(ext_timings_nv_delta_time, pulses, hitlets_nv, nv_pmt_start, nv_pmt_stop): """ numpy access with fancy index returns copy, not view This for-loop is required to substitute in one by one """ hitlet_index = np.arange(len(hitlets_nv)) pulse_index = np.arange(len(pulses)) for ch in range(nv_pmt_start, nv_pmt_stop): mask_hitlets_in_channel = hitlets_nv['channel'] == ch hitlet_in_channel_index = hitlet_index[mask_hitlets_in_channel] mask_pulse_in_channel = pulses['channel'] == ch pulse_in_channel_index = pulse_index[mask_pulse_in_channel] hitlets_in_channel = hitlets_nv[hitlet_in_channel_index] pulses_in_channel = pulses[pulse_in_channel_index] hit_in_pulse_index = strax.fully_contained_in(hitlets_in_channel, pulses_in_channel) for h_i, p_i in zip(hitlet_in_channel_index, hit_in_pulse_index): if p_i == -1: continue res = ext_timings_nv_delta_time[h_i] res['delta_time'] = hitlets_nv[h_i]['time'] + hitlets_nv[h_i]['time_amplitude'] \ - pulses_in_channel[p_i]['time'] res['pulse_i'] = pulse_in_channel_index[p_i]
def test_fully_contained_in(things, containers): result = strax.fully_contained_in(things, containers) assert len(result) == len(things) if len(result): assert result.max() < len(containers) for i, thing in enumerate(things): if result[i] == -1: # Check for false negative for c in containers: assert not _is_contained(thing, c) else: # Check for false positives assert _is_contained(thing, containers[result[i]])
def add_lone_hits(peaks, lone_hits, to_pe): """ Function which adds information from lone hits to peaks if lone hit is inside a peak (e.g. after merging.). Modifies peak area and data inplace. :param peaks: Numpy array of peaks :param lone_hits: Numpy array of lone_hits :param to_pe: Gain values to convert lone hit area into PE. """ fully_contained_index = strax.fully_contained_in(lone_hits, peaks) for fc_i, lh_i in zip(fully_contained_index, lone_hits): if fc_i == -1: continue p = peaks[fc_i] lh_area = lh_i['area'] * to_pe[lh_i['channel']] p['area'] += lh_area # Add lone hit as delta pulse to waveform: index = (p['time'] - lh_i['time']) // p['dt'] p['data'][index] += lh_area
def software_he_veto(records, to_pe, chunk_end, area_threshold=int(1e5), veto_length=int(3e6), veto_res=int(1e3), pass_veto_fraction=0.01, pass_veto_extend=3, max_veto_value=None): """Veto veto_length (time in ns) after peaks larger than area_threshold (in PE). Further large peaks inside the veto regions are still passed: We sum the waveform inside the veto region (with time resolution veto_res in ns) and pass regions within pass_veto_extend samples of samples with amplitude above pass_veto_fraction times the maximum. :returns: (preserved records, vetoed records, veto intervals). :param records: PMT records :param to_pe: ADC to PE conversion factors for the channels in records. :param chunk_end: Endtime of chunk to set as maximum ceiling for the veto period :param area_threshold: Minimum peak area to trigger the veto. Note we use a much rougher clustering than in later processing. :param veto_length: Time in ns to veto after the peak :param veto_res: Resolution of the sum waveform inside the veto region. Do not make too large without increasing integer type in some strax dtypes... :param pass_veto_fraction: fraction of maximum sum waveform amplitude to trigger veto passing of further peaks :param pass_veto_extend: samples to extend (left and right) the pass veto regions. :param max_veto_value: if not None, pass peaks that exceed this area no matter what. """ veto_res = int(veto_res) if veto_res > np.iinfo(np.int16).max: raise ValueError("Veto resolution does not fit 16-bit int") veto_length = np.ceil(veto_length / veto_res).astype(np.int) * veto_res veto_n = int(veto_length / veto_res) + 1 # 1. Find large peaks in the data. # This will actually return big agglomerations of peaks and their tails peaks = strax.find_peaks(records, to_pe, gap_threshold=1, left_extension=0, right_extension=0, min_channels=100, min_area=area_threshold, result_dtype=strax.peak_dtype( n_channels=len(to_pe), n_sum_wv_samples=veto_n)) # 2a. Set 'candidate regions' at these peaks. These should: # - Have a fixed maximum length (else we can't use the strax hitfinder on them) # - Never extend beyond the current chunk # - Do not overlap veto_start = peaks['time'] veto_end = np.clip(peaks['time'] + veto_length, None, chunk_end) veto_end[:-1] = np.clip(veto_end[:-1], None, veto_start[1:]) # 2b. Convert these into strax record-like objects # Note the waveform is float32 though (it's a summed waveform) regions = np.zeros(len(veto_start), dtype=strax.interval_dtype + [ ("data", (np.float32, veto_n)), ("baseline", np.float32), ("baseline_rms", np.float32), ("reduction_level", np.int64), ("record_i", np.int64), ("pulse_length", np.int64), ]) regions['time'] = veto_start regions['length'] = (veto_end - veto_start) // veto_n regions['pulse_length'] = veto_n regions['dt'] = veto_res if not len(regions): # No veto anywhere in this data return records, records[:0], np.zeros(0, strax.hit_dtype) # 3. Find pass_veto regios with big peaks inside the veto regions. # For this we compute a rough sum waveform (at low resolution, # without looping over the pulse data) rough_sum(regions, records, to_pe, veto_n, veto_res) if max_veto_value is not None: pass_veto = strax.find_hits(regions, min_amplitude=max_veto_value) else: regions['data'] /= np.max(regions['data'], axis=1)[:, np.newaxis] pass_veto = strax.find_hits(regions, min_amplitude=pass_veto_fraction) # 4. Extend these by a few samples and inverse to find veto regions regions['data'] = 1 regions = strax.cut_outside_hits(regions, pass_veto, left_extension=pass_veto_extend, right_extension=pass_veto_extend) regions['data'] = 1 - regions['data'] veto = strax.find_hits(regions, min_amplitude=1) # Do not remove very tiny regions veto = veto[veto['length'] > 2 * pass_veto_extend] # 5. Apply the veto and return results veto_mask = strax.fully_contained_in(records, veto) == -1 return tuple(list(mask_and_not(records, veto_mask)) + [veto])
def compute(self, records, start, end): r = records hits = strax.find_hits(r, min_amplitude=self.hit_thresholds) # Remove hits in zero-gain channels # they should not affect the clustering! hits = hits[self.to_pe[hits['channel']] != 0] hits = strax.sort_by_time(hits) # Use peaklet gap threshold for initial clustering # based on gaps between hits peaklets = strax.find_peaks( hits, self.to_pe, gap_threshold=self.config['peaklet_gap_threshold'], left_extension=self.config['peak_left_extension'], right_extension=self.config['peak_right_extension'], min_channels=self.config['peak_min_pmts'], result_dtype=self.dtype_for('peaklets'), max_duration=self.config['peaklet_max_duration'], ) # Make sure peaklets don't extend out of the chunk boundary # This should be very rare in normal data due to the ADC pretrigger # window. self.clip_peaklet_times(peaklets, start, end) # Get hits outside peaklets, and store them separately. # fully_contained is OK provided gap_threshold > extension, # which is asserted inside strax.find_peaks. is_lone_hit = strax.fully_contained_in(hits, peaklets) == -1 lone_hits = hits[is_lone_hit] strax.integrate_lone_hits( lone_hits, records, peaklets, save_outside_hits=(self.config['peak_left_extension'], self.config['peak_right_extension']), n_channels=len(self.to_pe)) # Compute basic peak properties -- needed before natural breaks hits = hits[~is_lone_hit] # Define regions outside of peaks such that _find_hit_integration_bounds # is not extended beyond a peak. outside_peaks = self.create_outside_peaks_region(peaklets, start, end) strax.find_hit_integration_bounds( hits, outside_peaks, records, save_outside_hits=(self.config['peak_left_extension'], self.config['peak_right_extension']), n_channels=len(self.to_pe), allow_bounds_beyond_records=True, ) # Transform hits to hitlets for naming conventions. A hit refers # to the central part above threshold a hitlet to the entire signal # including the left and right extension. # (We are not going to use the actual hitlet data_type here.) hitlets = hits del hits hitlet_time_shift = (hitlets['left'] - hitlets['left_integration']) * hitlets['dt'] hitlets['time'] = hitlets['time'] - hitlet_time_shift hitlets['length'] = (hitlets['right_integration'] - hitlets['left_integration']) hitlets = strax.sort_by_time(hitlets) rlinks = strax.record_links(records) strax.sum_waveform(peaklets, hitlets, r, rlinks, self.to_pe) strax.compute_widths(peaklets) # Split peaks using low-split natural breaks; # see https://github.com/XENONnT/straxen/pull/45 # and https://github.com/AxFoundation/strax/pull/225 peaklets = strax.split_peaks( peaklets, hitlets, r, rlinks, self.to_pe, algorithm='natural_breaks', threshold=self.natural_breaks_threshold, split_low=True, filter_wing_width=self.config['peak_split_filter_wing_width'], min_area=self.config['peak_split_min_area'], do_iterations=self.config['peak_split_iterations']) # Saturation correction using non-saturated channels # similar method used in pax # see https://github.com/XENON1T/pax/pull/712 # Cases when records is not writeable for unclear reason # only see this when loading 1T test data # more details on https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flags.html if not r['data'].flags.writeable: r = r.copy() if self.config['saturation_correction_on']: peak_list = peak_saturation_correction( r, rlinks, peaklets, hitlets, self.to_pe, reference_length=self.config['saturation_reference_length'], min_reference_length=self. config['saturation_min_reference_length']) # Compute the width again for corrected peaks strax.compute_widths(peaklets, select_peaks_indices=peak_list) # Compute tight coincidence level. # Making this a separate plugin would # (a) doing hitfinding yet again (or storing hits) # (b) increase strax memory usage / max_messages, # possibly due to its currently primitive scheduling. hit_max_times = np.sort( hitlets['time'] + hitlets['dt'] * hit_max_sample(records, hitlets) + hitlet_time_shift # add time shift again to get correct maximum ) peaklet_max_times = ( peaklets['time'] + np.argmax(peaklets['data'], axis=1) * peaklets['dt']) tight_coincidence_channel = get_tight_coin( hit_max_times, hitlets['channel'], peaklet_max_times, self.config['tight_coincidence_window_left'], self.config['tight_coincidence_window_right'], self.channel_range) peaklets['tight_coincidence'] = tight_coincidence_channel if self.config['diagnose_sorting'] and len(r): assert np.diff(r['time']).min(initial=1) >= 0, "Records not sorted" assert np.diff( hitlets['time']).min(initial=1) >= 0, "Hits/Hitlets not sorted" assert np.all(peaklets['time'][1:] >= strax.endtime(peaklets)[:-1] ), "Peaks not disjoint" # Update nhits of peaklets: counts = strax.touching_windows(hitlets, peaklets) counts = np.diff(counts, axis=1).flatten() peaklets['n_hits'] = counts return dict(peaklets=peaklets, lone_hits=lone_hits)
def compute(self, records, start, end): r = records hits = strax.find_hits(r, min_amplitude=straxen.hit_min_amplitude( self.config['hit_min_amplitude'])) # Remove hits in zero-gain channels # they should not affect the clustering! hits = hits[self.to_pe[hits['channel']] != 0] hits = strax.sort_by_time(hits) # Use peaklet gap threshold for initial clustering # based on gaps between hits peaklets = strax.find_peaks( hits, self.to_pe, gap_threshold=self.config['peaklet_gap_threshold'], left_extension=self.config['peak_left_extension'], right_extension=self.config['peak_right_extension'], min_channels=self.config['peak_min_pmts'], result_dtype=self.dtype_for('peaklets')) # Make sure peaklets don't extend out of the chunk boundary # This should be very rare in normal data due to the ADC pretrigger # window. self.clip_peaklet_times(peaklets, start, end) # Get hits outside peaklets, and store them separately. # fully_contained is OK provided gap_threshold > extension, # which is asserted inside strax.find_peaks. lone_hits = hits[strax.fully_contained_in(hits, peaklets) == -1] strax.integrate_lone_hits( lone_hits, records, peaklets, save_outside_hits=(self.config['peak_left_extension'], self.config['peak_right_extension']), n_channels=len(self.to_pe)) # Compute basic peak properties -- needed before natural breaks strax.sum_waveform(peaklets, r, self.to_pe) strax.compute_widths(peaklets) # Split peaks using low-split natural breaks; # see https://github.com/XENONnT/straxen/pull/45 # and https://github.com/AxFoundation/strax/pull/225 peaklets = strax.split_peaks( peaklets, r, self.to_pe, algorithm='natural_breaks', threshold=self.natural_breaks_threshold, split_low=True, filter_wing_width=self.config['peak_split_filter_wing_width'], min_area=self.config['peak_split_min_area'], do_iterations=self.config['peak_split_iterations']) # Saturation correction using non-saturated channels # similar method used in pax # see https://github.com/XENON1T/pax/pull/712 if self.config['saturation_correction_on']: peak_saturation_correction( r, peaklets, self.to_pe, reference_length=self.config['saturation_reference_length'], min_reference_length=self. config['saturation_min_reference_length']) # Compute tight coincidence level. # Making this a separate plugin would # (a) doing hitfinding yet again (or storing hits) # (b) increase strax memory usage / max_messages, # possibly due to its currently primitive scheduling. hit_max_times = np.sort(hits['time'] + hits['dt'] * hit_max_sample(records, hits)) peaklet_max_times = ( peaklets['time'] + np.argmax(peaklets['data'], axis=1) * peaklets['dt']) peaklets['tight_coincidence'] = get_tight_coin( hit_max_times, peaklet_max_times, self.config['tight_coincidence_window_left'], self.config['tight_coincidence_window_right']) if self.config['diagnose_sorting'] and len(r): assert np.diff(r['time']).min(initial=1) >= 0, "Records not sorted" assert np.diff(hits['time']).min(initial=1) >= 0, "Hits not sorted" assert np.all(peaklets['time'][1:] >= strax.endtime(peaklets)[:-1] ), "Peaks not disjoint" # Update nhits of peaklets: counts = strax.touching_windows(hits, peaklets) counts = np.diff(counts, axis=1).flatten() counts += 1 peaklets['n_hits'] = counts return dict(peaklets=peaklets, lone_hits=lone_hits)
def software_he_veto(records, to_pe, area_threshold=int(1e5), veto_length=int(3e6), veto_res=int(1e3), pass_veto_fraction=0.01, pass_veto_extend=3): """Veto veto_length (time in ns) after peaks larger than area_threshold (in PE). Further large peaks inside the veto regions are still passed: We sum the waveform inside the veto region (with time resolution veto_res in ns) and pass regions within pass_veto_extend samples of samples with amplitude above pass_veto_fraction times the maximum. :returns: (preserved records, vetoed records, veto intervals). :param records: PMT records :param to_pe: ADC to PE conversion factors for the channels in records. :param area_threshold: Minimum peak area to trigger the veto. Note we use a much rougher clustering than in later processing. :param veto_length: Time in ns to veto after the peak :param veto_res: Resolution of the sum waveform inside the veto region. Do not make too large without increasing integer type in some strax dtypes... :param pass_veto_fraction: fraction of maximum sum waveform amplitude to trigger veto passing of further peaks :param pass_veto_extend: samples to extend (left and right) the pass veto regions. """ veto_res = int(veto_res) if veto_res > np.iinfo(np.int16).max: raise ValueError("Veto resolution does not fit 16-bit int") veto_length = np.ceil(veto_length / veto_res).astype(np.int) * veto_res veto_n = int(veto_length / veto_res) + 1 # 1. Find large peaks in the data. # This will actually return big agglomerations of peaks and their tails peaks = strax.find_peaks( records, to_pe, gap_threshold=1, left_extension=0, right_extension=0, min_channels=100, min_area=area_threshold, result_dtype=strax.peak_dtype(n_channels=len(to_pe), n_sum_wv_samples=veto_n)) # 2. Find initial veto regions around these peaks # (with a generous right extension) veto_start, veto_end = strax.find_peak_groups( peaks, gap_threshold=veto_length + 2 * veto_res, right_extension=veto_length, left_extension=veto_res) veto_end = veto_end.clip(0, strax.endtime(records[-1])) veto_length = veto_end - veto_start # dtype is like record (since we want to use hitfiding etc) # but with float32 waveform regions = np.zeros( len(veto_start), dtype=strax.interval_dtype + [ ("data", (np.float32, veto_n)), ("baseline", np.float32), ("reduction_level", np.int64), ("record_i", np.int64), ("pulse_length", np.int64), ]) regions['time'] = veto_start regions['length'] = veto_length regions['pulse_length'] = veto_length regions['dt'] = veto_res if not len(regions): # No veto anywhere in this data return records, records[:0], np.zeros(0, strax.hit_dtype) # 3. Find pass_veto regios with big peaks inside the veto regions. # For this we compute a rough sum waveform (at low resolution, # without looping over the pulse data) rough_sum(regions, records, to_pe, veto_n, veto_res) regions['data'] /= np.max(regions['data'], axis=1)[:, np.newaxis] pass_veto = strax.find_hits(regions, threshold=pass_veto_fraction) # 4. Extend these by a few samples and inverse to find veto regions regions['data'] = 1 regions = strax.cut_outside_hits( regions, pass_veto, left_extension=pass_veto_extend, right_extension=pass_veto_extend) regions['data'] = 1 - regions['data'] veto = strax.find_hits(regions, threshold=0.5) # Do not remove very tiny regions veto = veto[veto['length'] > 2 * pass_veto_extend] # 5. Apply the veto and return results veto_mask = strax.fully_contained_in(records, veto) == -1 return tuple(list(_mask_and_not(records, veto_mask)) + [veto])