def median_baseline(records): """ Function which computes the baseline according the pulse's median. :param records: Records """ # Count number of pulses npulses = np.sum(records['record_i'] == 0) fail_counter = 0 if npulses == 1: # This case is simple records = _correct_baseline(records) else: # Now the more complicated case in which we have multiple pulses # First we have to group our record fragments into their # pulses. Hence get record links and group indicies: _, nextr = strax.record_links(records) pulse_i = [] # Loop over the begining of every pulse and get all next indicies. for i in np.where(records['record_i'] == 0)[0]: inds = [i] ind = nextr[i] # Always look for next index as long there are some while ind != -1: inds += [ind] ind = nextr[ind] fail_counter += 1 assert fail_counter < 5000, 'Stuck in while-loop pulse is longer than 5000 fragments?!?' pulse_i.append(inds) for pi in pulse_i: records[pi] = _correct_baseline(records[pi]) return records
def compute(self, raw_records_coin_nv): # Do not trust in DAQ + strax.baseline to leave the # out-of-bounds samples to zero. r = strax.raw_to_records(raw_records_coin_nv) del raw_records_coin_nv r = strax.sort_by_time(r) strax.zero_out_of_bounds(r) strax.baseline(r, baseline_samples=self.config['baseline_samples_nv'], flip=True) strax.integrate(r) strax.zero_out_of_bounds(r) hits = strax.find_hits( r, min_amplitude=self.config['hit_min_amplitude_nv']) le, re = self.config['save_outside_hits_nv'] r = strax.cut_outside_hits(r, hits, left_extension=le, right_extension=re) strax.zero_out_of_bounds(r) rlinks = strax.record_links(r) r = clean_up_empty_records(r, rlinks, only_last=True) return r
def _yield_pulse_indices(records): """ Function which yields indices of records which are within a pulse. Note: Only finds fragments of the pulse if record_i == 0 is within list of records. :yields: indices of fragments to make the corresponding pulse. """ # Get record links and find start indicies: _, next_ri = strax.record_links(records) start_ri = np.where(records['record_i'] == 0)[0] # Loop over pulse start_ri, group fragments by pulses yield for plot: for ri in start_ri: # Buffer for indices: inds = [] tries = 1 max_tries = 5000 while ri != -1: inds.append(ri) ri = next_ri[ri] tries += 1 if tries > max_tries: raise ValueError( 'Tried more than 5000 times to find subsequent record.' ' Am I stuck in a loop?') yield inds
def split_peaks(peaks, records, to_pe, algorithm='local_minimum', data_type='peaks', **kwargs): """Return peaks split according to algorithm, with waveforms summed and widths computed. :param peaks: Original peaks. Sum waveform must have been built and properties must have been computed (if you use them) :param records: Records from which peaks were built :param to_pe: ADC to PE conversion factor array (of n_channels) :param algorithm: 'local_minimum' or 'natural_breaks'. :param data_type: 'peaks' or 'hitlets'. Specifies whether to use sum_wavefrom or get_hitlets_data to compute the waveform of the new split peaks/hitlets. :param result_dtype: dtype of the result. Any other options are passed to the algorithm. """ splitter = dict(local_minimum=LocalMinimumSplitter, natural_breaks=NaturalBreaksSplitter)[algorithm]() if data_type == 'hitlets': # This is only needed once. _, next_ri = strax.record_links(records) elif data_type == 'peaks': next_ri = None else: raise TypeError(f'Data_type "{data_type}" is not supported.') return splitter(peaks, records, to_pe, data_type, next_ri, **kwargs)
def get_hitlets_data(hitlets, records, to_pe): """ Function which searches for every hitlet in a given chunk the corresponding records data. :param hitlets: Hitlets found in a chunk of records. :param records: Records of the chunk. :param to_pe: Array with area conversion factors from adc/sample to pe/sample Note: hitlets must have a "data" and "area" field. The function updates the hitlet fields time, length (if necessary e.g. hit was extended in regions of now records) and area according to the found data. """ rlink = strax.record_links(records) for h in hitlets: data, start_time = get_single_hitlet_data(h, records, *rlink) h['length'] = len(data) h['data'][:len(data)] = data * to_pe[h['channel']] h['time'] = start_time h['area'] = np.sum(data * to_pe[h['channel']])
def compute(self, raw_records_nv, start, end): strax.zero_out_of_bounds(raw_records_nv) # First we have to split rr into records and lone records: # Please note that we consider everything as a lone record which # does not satisfy the coincidence requirement intervals = coincidence(raw_records_nv, self.config['coincidence_level_recorder_nv'], self.config['resolving_time_recorder_nv']) # Always save the first and last resolving_time nanoseconds (e.g. 600 ns) since we cannot guarantee the gap # size to be larger. (We cannot use an OverlapingWindow plugin either since it requires disjoint objects.) if len(intervals): intervals_with_bounds = np.zeros((len(intervals) + 2, 2), dtype=np.int64) intervals_with_bounds[1:-1, :] = intervals intervals_with_bounds[0, :] = start, min( start + self.config['resolving_time_recorder_nv'], intervals[0, 0]) intervals_with_bounds[-1, :] = max( end - self.config['resolving_time_recorder_nv'], intervals[-1, 1]), end del intervals else: intervals_with_bounds = np.zeros((0, 2), dtype=np.int64) neighbors = strax.record_links(raw_records_nv) mask = pulse_in_interval(raw_records_nv, neighbors, *np.transpose(intervals_with_bounds)) rr, lone_records = straxen.mask_and_not(raw_records_nv, mask) # Compute some properties of the lone_records: # We compute only for lone_records baseline etc. since # raw_records_nv will be deleted, otherwise we could not change # the settings and reprocess the data in case of raw_records_nv lr = strax.raw_to_records(lone_records) del lone_records lr = strax.sort_by_time(lr) strax.zero_out_of_bounds(lr) strax.baseline( lr, baseline_samples=self.config['nbaseline_samples_lone_records_nv'], flip=True) strax.integrate(lr) lrs, lr = compute_lone_records(lr, self.config['channel_map']['nveto'], self.config['n_lone_records_nv']) lrs['time'] = start lrs['endtime'] = end return { 'raw_records_coin_nv': rr, 'lone_raw_records_nv': lr, 'lone_raw_record_statistics_nv': lrs }
def test_splitter_outer(): data = [0, 2, 2, 0, 2, 2, 1] records = np.zeros(1, dtype=strax.record_dtype(len(data))) records['dt'] = 1 records['data'] = data records['length'] = len(data) records['pulse_length'] = len(data) to_pe = np.ones(10) hits = strax.find_hits(records, np.ones(1)) hits['left_integration'] = hits['left'] hits['right_integration'] = hits['right'] peaks = np.zeros(1, dtype=strax.peak_dtype()) hitlets = np.zeros(1, dtype=strax.hitlet_with_data_dtype(10)) for data_type in (peaks, hitlets): data_type['dt'] = 1 data_type['data'][0, :len(data)] = data data_type['length'] = len(data) rlinks = strax.record_links(records) peaks = strax.split_peaks(peaks, hits, records, rlinks, to_pe, algorithm='local_minimum', data_type='peaks', min_height=1, min_ratio=0) hitlets = strax.split_peaks(hitlets, hits, records, rlinks, to_pe, algorithm='local_minimum', data_type='hitlets', min_height=1, min_ratio=0) for name, data_type in zip(('peaks', 'hitlets'), (peaks, hitlets)): data = data_type[0]['data'][:data_type[0]['length']] assert np.all( data == [0, 2, 2] ), f'Wrong split for {name}, got {data}, expected {[0, 2, 2]}.' data = data_type[1]['data'][:data_type[1]['length']] assert np.all( data == [0, 2, 2, 1] ), f'Wrong split for {name}, got {data}, expected {[0, 2, 2, 1]}.'
def filter_records(r, ir): """Apply filter with impulse response ir over the records r. Assumes the filter origin is at the impulse response maximum. :param ws: Waveform matrix, must be float :param ir: Impulse response, must have odd length. Will normalize. :param prev_r: Previous record map from strax.record_links :param next_r: Next record map from strax.record_links """ if not len(r): return r ws = _waveforms_to_float(r['data'], r['baseline']) prev_r, next_r = strax.record_links(r) ws_filtered = filter_waveforms(ws, (ir / ir.sum()).astype(np.float32), prev_r, next_r) # Restore waveforms as integers r['data'] = ws_filtered.astype(np.int16)
def test_sum_waveform(records): # Make a single big peak to contain all the records n_ch = 100 rlinks = strax.record_links(records) hits = strax.find_hits(records, np.ones(n_ch)) hits['left_integration'] = hits['left'] hits['right_integration'] = hits['right'] hits = strax.sort_by_time(hits) peaks = strax.find_peaks(hits, np.ones(n_ch), gap_threshold=6, left_extension=2, right_extension=3, min_area=0, min_channels=1, max_duration=10_000_000) strax.sum_waveform(peaks, hits, records, rlinks, np.ones(n_ch)) for p in peaks: # Area measures must be consistent area = p['area'] assert area >= 0 assert p['data'].sum() == area assert p['area_per_channel'].sum() == area sum_wv = np.zeros(p['length'], dtype=np.float32) for r in records: (rs, re), (ps, pe) = strax.overlap_indices(r['time'], r['length'], p['time'], p['length']) sum_wv[ps:pe] += r['data'][rs:re] assert np.all(p['data'][:p['length']] == sum_wv) # Finally check that we also can use a selection of peaks to sum strax.sum_waveform(peaks, hits, records, rlinks, np.ones(n_ch), select_peaks_indices=np.array([0]))
def filter_records(r, ir): """Apply filter with impulse response ir over the records r. Assumes the filter origin is at the impulse response maximum. :param ws: Waveform matrix, must be float :param ir: Impulse response, must have odd length. Will normalize. :param prev_r: Previous record map from strax.record_links :param next_r: Next record map from strax.record_links """ # Convert waveforms to float and restore baseline ws = r['data'].astype(np.float) + (r['baseline'] % 1)[:, np.newaxis] prev_r, next_r = strax.record_links(r) ws_filtered = filter_waveforms( ws, ir / ir.sum(), prev_r, next_r) # Restore waveforms as integers r['data'] = ws_filtered.astype(np.int16)
def compute(self, records, start, end): r = records hits = strax.find_hits(r, min_amplitude=self.hit_thresholds) # Remove hits in zero-gain channels # they should not affect the clustering! hits = hits[self.to_pe[hits['channel']] != 0] hits = strax.sort_by_time(hits) # Use peaklet gap threshold for initial clustering # based on gaps between hits peaklets = strax.find_peaks( hits, self.to_pe, gap_threshold=self.config['peaklet_gap_threshold'], left_extension=self.config['peak_left_extension'], right_extension=self.config['peak_right_extension'], min_channels=self.config['peak_min_pmts'], result_dtype=self.dtype_for('peaklets'), max_duration=self.config['peaklet_max_duration'], ) # Make sure peaklets don't extend out of the chunk boundary # This should be very rare in normal data due to the ADC pretrigger # window. self.clip_peaklet_times(peaklets, start, end) # Get hits outside peaklets, and store them separately. # fully_contained is OK provided gap_threshold > extension, # which is asserted inside strax.find_peaks. is_lone_hit = strax.fully_contained_in(hits, peaklets) == -1 lone_hits = hits[is_lone_hit] strax.integrate_lone_hits( lone_hits, records, peaklets, save_outside_hits=(self.config['peak_left_extension'], self.config['peak_right_extension']), n_channels=len(self.to_pe)) # Compute basic peak properties -- needed before natural breaks hits = hits[~is_lone_hit] # Define regions outside of peaks such that _find_hit_integration_bounds # is not extended beyond a peak. outside_peaks = self.create_outside_peaks_region(peaklets, start, end) strax.find_hit_integration_bounds( hits, outside_peaks, records, save_outside_hits=(self.config['peak_left_extension'], self.config['peak_right_extension']), n_channels=len(self.to_pe), allow_bounds_beyond_records=True, ) # Transform hits to hitlets for naming conventions. A hit refers # to the central part above threshold a hitlet to the entire signal # including the left and right extension. # (We are not going to use the actual hitlet data_type here.) hitlets = hits del hits hitlet_time_shift = (hitlets['left'] - hitlets['left_integration']) * hitlets['dt'] hitlets['time'] = hitlets['time'] - hitlet_time_shift hitlets['length'] = (hitlets['right_integration'] - hitlets['left_integration']) hitlets = strax.sort_by_time(hitlets) rlinks = strax.record_links(records) strax.sum_waveform(peaklets, hitlets, r, rlinks, self.to_pe) strax.compute_widths(peaklets) # Split peaks using low-split natural breaks; # see https://github.com/XENONnT/straxen/pull/45 # and https://github.com/AxFoundation/strax/pull/225 peaklets = strax.split_peaks( peaklets, hitlets, r, rlinks, self.to_pe, algorithm='natural_breaks', threshold=self.natural_breaks_threshold, split_low=True, filter_wing_width=self.config['peak_split_filter_wing_width'], min_area=self.config['peak_split_min_area'], do_iterations=self.config['peak_split_iterations']) # Saturation correction using non-saturated channels # similar method used in pax # see https://github.com/XENON1T/pax/pull/712 # Cases when records is not writeable for unclear reason # only see this when loading 1T test data # more details on https://numpy.org/doc/stable/reference/generated/numpy.ndarray.flags.html if not r['data'].flags.writeable: r = r.copy() if self.config['saturation_correction_on']: peak_list = peak_saturation_correction( r, rlinks, peaklets, hitlets, self.to_pe, reference_length=self.config['saturation_reference_length'], min_reference_length=self. config['saturation_min_reference_length']) # Compute the width again for corrected peaks strax.compute_widths(peaklets, select_peaks_indices=peak_list) # Compute tight coincidence level. # Making this a separate plugin would # (a) doing hitfinding yet again (or storing hits) # (b) increase strax memory usage / max_messages, # possibly due to its currently primitive scheduling. hit_max_times = np.sort( hitlets['time'] + hitlets['dt'] * hit_max_sample(records, hitlets) + hitlet_time_shift # add time shift again to get correct maximum ) peaklet_max_times = ( peaklets['time'] + np.argmax(peaklets['data'], axis=1) * peaklets['dt']) tight_coincidence_channel = get_tight_coin( hit_max_times, hitlets['channel'], peaklet_max_times, self.config['tight_coincidence_window_left'], self.config['tight_coincidence_window_right'], self.channel_range) peaklets['tight_coincidence'] = tight_coincidence_channel if self.config['diagnose_sorting'] and len(r): assert np.diff(r['time']).min(initial=1) >= 0, "Records not sorted" assert np.diff( hitlets['time']).min(initial=1) >= 0, "Hits/Hitlets not sorted" assert np.all(peaklets['time'][1:] >= strax.endtime(peaklets)[:-1] ), "Peaks not disjoint" # Update nhits of peaklets: counts = strax.touching_windows(hitlets, peaklets) counts = np.diff(counts, axis=1).flatten() peaklets['n_hits'] = counts return dict(peaklets=peaklets, lone_hits=lone_hits)
def compute(self, raw_records_nv, start, end): if self.config['check_raw_record_overlaps_nv']: straxen.check_overlaps(raw_records_nv, n_channels=3000) # Cover the case if we do not want to have any coincidence: if self.config['coincidence_level_recorder_nv'] <= 1: rr = raw_records_nv lr = np.zeros(0, dtype=self.dtype['lone_raw_records_nv']) lrs = np.zeros(0, dtype=self.dtype['lone_raw_record_statistics_nv']) return { 'raw_records_coin_nv': rr, 'lone_raw_records_nv': lr, 'lone_raw_record_statistics_nv': lrs } # Search for hits to define coincidence intervals: temp_records = strax.raw_to_records(raw_records_nv) temp_records = strax.sort_by_time(temp_records) strax.zero_out_of_bounds(temp_records) strax.baseline(temp_records, baseline_samples=self.baseline_samples, flip=True) hits = strax.find_hits(temp_records, min_amplitude=self.hit_thresholds) del temp_records # First we have to split rr into records and lone records: # Please note that we consider everything as a lone record which # does not satisfy the coincidence requirement intervals = find_coincidence( hits, self.config['coincidence_level_recorder_nv'], self.config['resolving_time_recorder_nv'], self.config['pre_trigger_time_nv']) del hits # Always save the first and last resolving_time nanoseconds (e.g. 600 ns) since we cannot guarantee the gap # size to be larger. (We cannot use an OverlapingWindow plugin either since it requires disjoint objects.) if len(intervals): intervals_with_bounds = np.zeros(len(intervals) + 2, dtype=strax.time_fields) intervals_with_bounds['time'][1:-1] = intervals['time'] intervals_with_bounds['endtime'][1:-1] = intervals['endtime'] intervals_with_bounds['time'][0] = start intervals_with_bounds['endtime'][0] = min( start + self.config['resolving_time_recorder_nv'], intervals['time'][0]) intervals_with_bounds['time'][-1] = max( end - self.config['resolving_time_recorder_nv'], intervals['endtime'][-1]) intervals_with_bounds['endtime'][-1] = end del intervals else: intervals_with_bounds = np.zeros((0, 2), dtype=strax.time_fields) neighbors = strax.record_links(raw_records_nv) mask = pulse_in_interval( raw_records_nv, neighbors, intervals_with_bounds['time'], intervals_with_bounds['endtime'], ) rr, lone_records = straxen.mask_and_not(raw_records_nv, mask) # Compute some properties of the lone_records: # We compute only for lone_records baseline etc. since # raw_records_nv will be deleted, otherwise we could not change # the settings and reprocess the data in case of raw_records_nv lr = strax.raw_to_records(lone_records) del lone_records lr = strax.sort_by_time(lr) strax.zero_out_of_bounds(lr) strax.baseline(lr, baseline_samples=self.baseline_samples, flip=True) strax.integrate(lr) lrs, lr = compute_lone_records(lr, self.config['channel_map']['nveto'], self.config['n_lone_records_nv']) lrs['time'] = start lrs['endtime'] = end return { 'raw_records_coin_nv': rr, 'lone_raw_records_nv': lr, 'lone_raw_record_statistics_nv': lrs }
def test_peak_overflow( records, gap_factor, right_extension, gap_threshold, max_duration, ): """ Test that we handle dt overflows in peaks correctly. To this end, we just create some sets of records and copy that set of records for a few times. That way we may end up with a very long artificial set of hits that can be used in the peak building. By setting the peak finding parameters to very strange conditions we are able to replicate the behaviour where a peak would become so large that it cannot be written out correctly due to integer overflow of the dt field, :param records: records :param gap_factor: to create very extended sets of records, just add a factor that can be used to multiply the time field with, to more quickly arrive to a very long pulse-train :param max_duration: max_duration option for strax.find_peaks :param right_extension: option for strax.find_peaks :param gap_threshold: option for strax.find_peaks :return: None """ # Set this here, no need to test left and right independently left_extension = 0 # Make a single big peak to contain all the records peak_dtype = np.zeros(0, strax.peak_dtype()).dtype # NB! This is only for before #403, now peaks are int32 so # this test would take forever with int32. magic_overflow_time = np.iinfo(np.int16).max * peak_dtype['data'].shape[0] def retrun_1(x): """ Return 1 for all of the input that can be used as a parameter for the splitting in natural breaks :param x: any type of array :return: ones * len(array) """ ret = np.ones(len(x)) return ret r = records if not len(r) or len(r['channel']) == 1: # Hard to test integer overflow for empty records or with # records only from a single channel return # Copy the pulse train of the records. We are going to copy the same # set of records many times now. t_max = strax.endtime(r).max() print('make buffer') n_repeat = int(1.5 * magic_overflow_time + t_max * gap_factor) // int( t_max * gap_factor) + 1 time_offset = np.linspace(0, 1.5 * magic_overflow_time + t_max * gap_factor, n_repeat, dtype=np.int64) r_buffer = np.tile(r, n_repeat // len(r) + 1)[:len(time_offset)] assert len(r_buffer) == len(time_offset) r_buffer['time'] = r_buffer['time'] + time_offset assert strax.endtime( r_buffer[-1]) - r_buffer['time'].min() > magic_overflow_time r = r_buffer.copy() del r_buffer print(f'Array is {r.nbytes/1e6} MB, good luck') # Do peak finding! print(f'Find hits') hits = strax.find_hits(r, min_amplitude=0) assert len(hits) hits = strax.sort_by_time(hits) # Dummy to_pe to_pe = np.ones(max(r['channel']) + 1) try: print('Find peaks') # Find peaks, we might end up with negative dt here! p = strax.find_peaks( hits, to_pe, gap_threshold=gap_threshold, left_extension=left_extension, right_extension=right_extension, max_duration=max_duration, # Due to these settings, we will start merging # whatever strax can get its hands on min_area=0., min_channels=1, ) except AssertionError as e: if not gap_threshold > left_extension + right_extension: print(f'Great, we are getting the assertion statement for the ' f'incongruent extensions') return elif not left_extension + max_duration + right_extension < magic_overflow_time: # Ending up here is the ultimate goal of the tests. This # means we are hitting github.com/AxFoundation/strax/issues/397 print(f'Great, the test worked, we are getting the assertion ' f'statement for the int overflow') return else: # The error is caused by something else, we need to re-raise raise e print(f'Peaklet array is {p.nbytes / 1e6} MB, good luck') if len(p) == 0: print(f'rec length {len(r)}') assert len(p) assert np.all(p['dt'] > 0) # Double check that this error should have been raised. if not gap_threshold > left_extension + right_extension: raise ValueError(f'No assertion error raised! Working with' f'{gap_threshold} {left_extension + right_extension}') # Compute basics hits = strax.find_hits(r, np.ones(10000)) hits['left_integration'] = hits['left'] hits['right_integration'] = hits['right'] rlinks = strax.record_links(r) strax.sum_waveform(p, hits, r, rlinks, to_pe) strax.compute_widths(p) try: print('Split peaks') peaklets = strax.split_peaks(p, hits, r, rlinks, to_pe, algorithm='natural_breaks', threshold=retrun_1, split_low=True, filter_wing_width=70, min_area=0, do_iterations=2) except AssertionError as e: if not left_extension + max_duration + right_extension < magic_overflow_time: # Ending up here is the ultimate goal of the tests. This # means we are hitting github.com/AxFoundation/strax/issues/397 print(f'Great, the test worked, we are getting the assertion ' f'statement for the int overflow') raise RuntimeError( 'We were not properly warned of the imminent peril we are ' 'facing. This error means that the peak_finding is not ' 'protected against integer overflow in the dt field. Where is ' 'our white knight in shining armour to protected from this ' 'imminent doom:\n' 'github.com/AxFoundation/strax/issues/397') from e # We failed for another reason, we need to re-raise raise e assert len(peaklets) assert len(peaklets) <= len(r) # Integer overflow will manifest itself here again: assert np.all(peaklets['dt'] > 0)