def test_split_array(data, t, allow_early_split): print( f"\nCalled with {np.transpose([data['time'], strax.endtime(data)]).tolist()}, " f"{t}, {allow_early_split}") try: data1, data2, tsplit = strax.split_array( data, t, allow_early_split=allow_early_split) except strax.CannotSplit: assert not allow_early_split # There must be data straddling t for d in data: if d['time'] < t < strax.endtime(d): break else: raise ValueError("threw CannotSplit needlessly") else: if allow_early_split: assert tsplit <= t t = tsplit assert len(data1) + len(data2) == len(data) assert np.all(strax.endtime(data1) <= t) assert np.all(data2['time'] >= t)
def _get_deepwindows(windows, peaks_a, peaks_b, matching_fuzz, _deep_windows): # Calculate the endtimes once peak_a_endtimes = strax.endtime(peaks_a) peak_b_endtimes = strax.endtime(peaks_b) # If we previously started on an index, the next index will not be # before this, if we start here, we save time. prev_start = 0 for window_i, w in enumerate(windows): l1, r1 = w if r1 - l1: match = strax.processing.general._touching_windows( peaks_a['time'][prev_start:], peak_a_endtimes[prev_start:], peaks_b[l1:r1]['time'], peak_b_endtimes[l1:r1], window=matching_fuzz) if len(match): # We have skipped the first prev_start items, add here this_window = match[0] + prev_start _deep_windows[window_i] = this_window prev_start = max(match[0][0], prev_start) else: # No match pass return _deep_windows
def find_peak_groups(peaks, gap_threshold, left_extension=0, right_extension=0): """Return boundaries of groups of peaks separated by gap_threshold, extended left and right. :param peaks: Peaks to group :param gap_threshold: Minimum gap between peaks :param left_extension: Extend groups by this many ns left :param right_extension: " " right :return: time, endtime arrays of group boundaries """ # Mock up a "hits" array so we can just use the existing peakfinder # It doesn't work on raw peaks, since they might have different dts # TODO: is there no cleaner way? fake_hits = np.zeros(len(peaks), dtype=strax.hit_dtype) fake_hits['dt'] = 1 fake_hits['area'] = 1 fake_hits['time'] = peaks['time'] # TODO: could this cause int overrun nonsense anywhere? fake_hits['length'] = strax.endtime(peaks) - peaks['time'] fake_peaks = strax.find_peaks(fake_hits, adc_to_pe=np.ones(1), gap_threshold=gap_threshold, left_extension=left_extension, right_extension=right_extension, min_channels=1, min_area=0) return fake_peaks['time'], strax.endtime(fake_peaks)
def do_compute(self, chunk_i=None, **kwargs): if not len(kwargs): raise RuntimeError("OverlapWindowPlugin must have a dependency") end = max([strax.endtime(x[-1]) for x in kwargs.values()]) # Take slightly larger windows for safety: it is very easy for me # (or the user) to have made an off-by-one error # TODO: why do tests not fail is I set cache_inputs_beyond to # end - window size - 2 ? # (they do fail if I set to end - 0.5 * window size - 2) invalid_beyond = end - self.get_window_size() - 1 cache_inputs_beyond = end - 2 * self.get_window_size() - 1 for k, v in kwargs.items(): if len(self.cached_input): kwargs[k] = v = np.concatenate([self.cached_input[k], v]) self.cached_input[k] = v[strax.endtime(v) > cache_inputs_beyond] result = super().do_compute(chunk_i=chunk_i, **kwargs) endtimes = strax.endtime(kwargs[self.data_kind] if self.data_kind in kwargs else result) assert len(endtimes) == len(result) is_valid = endtimes < invalid_beyond not_sent_yet = endtimes >= self.last_threshold # Cache all results we have not sent, nor are sending now self.cached_results = result[not_sent_yet & (~is_valid)] # Send out only valid results we haven't sent yet result = result[is_valid & not_sent_yet] self.last_threshold = invalid_beyond return result
def _update_record_i(new_hitlets, records, next_ri): """ Function which updates the record_i value of the new hitlets. Notes: Assumes new_hitlets to be sorted in time. """ for ind, hit in enumerate(new_hitlets): updated = False counter = 0 current_ri = hit['record_i'] while not updated: r = records[current_ri] # Hitlet must only partially be contained in record_i: time = hit['time'] end_time = strax.endtime(hit) start_in = (r['time'] <= time) & (time < strax.endtime(r)) end_in = (r['time'] < end_time) & (end_time <= strax.endtime(r)) if start_in or end_in: hit['record_i'] = current_ri break else: last_ri = current_ri current_ri = next_ri[current_ri] counter += 1 if current_ri == -1: print('Record:\n', r, '\nHit:\n', hit) raise ValueError('Was not able to find record_i') if counter > TRIAL_COUNTER_NEIGHBORING_RECORDS: print(ind, last_ri) raise RuntimeError('Tried too often to find correct record_i.')
def do_compute(self, chunk_i=None, **kwargs): if not len(kwargs): raise RuntimeError("OverlapWindowPlugin must have a dependency") end = max([strax.endtime(x[-1]) for x in kwargs.values()]) invalid_beyond = end - self.get_window_size() cache_inputs_beyond = end - 3 * self.get_window_size() for k, v in kwargs.items(): if len(self.cached_input): kwargs[k] = v = np.concatenate([self.cached_input[k], v]) self.cached_input[k] = v[strax.endtime(v) > cache_inputs_beyond] result = super().do_compute(chunk_i=chunk_i, **kwargs) endtimes = strax.endtime(kwargs[self.data_kind] if self.data_kind in kwargs else result) assert len(endtimes) == len(result) # Remove results that are invalid or already sent out last time is_valid = endtimes < invalid_beyond self.cached_results = result[~is_valid] result = result[is_valid & (endtimes > self.last_threshold)] self.last_threshold = invalid_beyond return result
def _find_hit_integration_bounds(lone_hits, peaks, records, save_outside_hits, n_channels): """"Update lone hits to include integration bounds save_outside_hits: in ns!! """ result = np.zeros((len(lone_hits), 2), dtype=np.int64) if not len(lone_hits): return result # By default, use save_outside_hits to determine bounds result[:, 0] = lone_hits['time'] - save_outside_hits[0] result[:, 1] = strax.endtime(lone_hits) + save_outside_hits[1] NO_EARLIER_HIT = -1 last_hit_index = np.ones(n_channels, dtype=np.int32) * NO_EARLIER_HIT n_peaks = len(peaks) FAR_AWAY = 9223372036_854775807 # np.iinfo(np.int64).max, April 2262 peak_i = 0 for hit_i, h in enumerate(lone_hits): ch = h['channel'] # Find end of previous peak and start of next peak # (note peaks are disjoint from any lone hit, even though # lone hits may not be disjoint from each other) while peak_i < n_peaks and peaks[peak_i]['time'] < h['time']: peak_i += 1 prev_p_end = strax.endtime(peaks[peak_i - 1]) if peak_i != 0 else 0 next_p_start = peaks[peak_i]['time'] if peak_i != n_peaks else FAR_AWAY # Ensure we do not integrate parts of peaks # or (at least for now) beyond the record in which the hit was found r = records[h['record_i']] result[hit_i][0] = max(prev_p_end, r['time'], result[hit_i][0]) result[hit_i][1] = min(next_p_start, strax.endtime(r), result[hit_i][1]) if last_hit_index[ch] != NO_EARLIER_HIT: # Ensure previous hit does not integrate the over-threshold region # of this hit result[last_hit_index[ch]][1] = min(result[last_hit_index[ch]][1], h['time']) # Ensure this hit doesn't integrate anything the previous hit # already integrated result[hit_i][0] = max(result[last_hit_index[ch]][1], result[hit_i][0]) last_hit_index[ch] = hit_i # Convert to index in record and store t0 = records[lone_hits['record_i']]['time'] dt = records[lone_hits['record_i']]['dt'] for hit_i, h in enumerate(lone_hits): h['left_integration'] = (result[hit_i, 0] - t0[hit_i]) // dt[hit_i] h['right_integration'] = (result[hit_i, 1] - t0[hit_i]) // dt[hit_i]
def compute(self, peaks): p = np.zeros(len(peaks), self.dtype['peak_classification']) p['time'] = peaks['time'] p['endtime'] = strax.endtime(peaks) lh = np.zeros(len(peaks), self.dtype['lone_hits']) lh['time'] = peaks['time'] lh['endtime'] = strax.endtime(peaks) return dict(peak_classification=p, lone_hits=lh)
def test_concat_overlapping_hits(hits0, hits1, le, re): # combining fake hits of the two channels: hits1['channel'] = 1 hits = np.concatenate([hits0, hits1]) if not len(hits): # In case there are no hitlets there is not much to do: concat_hits = strax.concat_overlapping_hits(hits, (le, re), (0, 1), 0, float('inf')) assert not len( concat_hits), 'Concatenated hits not empty although hits are empty' else: hits = strax.sort_by_time(hits) # Additional offset to time since le > hits['time'].min() does not # make sense: hits['time'] += 100 # Now we are ready for the tests: # Creating for each channel a dummy array. tmax = strax.endtime( hits).max() # Since dt is one this is the last sample tmax += re dummy_array = np.zeros((2, tmax), np.int64) for h in hits: # Filling samples with 1 if inside a hit: st = h['time'] - le et = strax.endtime(h) + re dummy_array[h['channel'], st:et] = 1 # Now we concatenate the hits and check whether their length matches # with the total sum of our dummy arrays. concat_hits = strax.concat_overlapping_hits(hits, (le, re), (0, 1), 0, float('inf')) assert len(concat_hits) <= len( hits), 'Somehow we have more hits than before ?!?' for ch in [0, 1]: dummy_sum = np.sum(dummy_array[ch]) # Computing total length of concatenated hits: diff = strax.endtime(concat_hits) - concat_hits['time'] m = concat_hits['channel'] == ch concat_sum = np.sum(diff[m]) assert concat_sum == dummy_sum, f'Total length of concatenated hits deviates from hits for channel {ch}' if len(concat_hits[m]) > 1: # Checking if new hits do not overlapp or touch anymore: mask = strax.endtime( concat_hits[m])[:-1] - concat_hits[m]['time'][1:] assert np.all( mask < 0 ), f'Found two hits within {ch} which are touching or overlapping'
def test_cut_plugin(input_peaks, cut_threshold): """ """ # Just one chunk will do chunks = [input_peaks] _dtype = input_peaks.dtype class ToBeCut(strax.Plugin): """Data to be cut with strax.CutPlugin""" depends_on = tuple() dtype = _dtype provides = 'to_be_cut' data_kind = 'to_be_cut' # match with depends_on below def compute(self, chunk_i): data = chunks[chunk_i] return self.chunk(data=data, start=(int(data[0]['time']) if len(data) else np.arange(len(chunks))[chunk_i]), end=(int(strax.endtime(data[-1])) if len(data) else np.arange(1, len(chunks) + 1)[chunk_i])) # Hack to make peak output stop after a few chunks def is_ready(self, chunk_i): return chunk_i < len(chunks) def source_finished(self): return True class CutSomething(strax.CutPlugin): """Minimal working example of CutPlugin""" depends_on = ('to_be_cut', ) def cut_by(self, to_be_cut): return to_be_cut[_dtype_name] > cut_threshold st = strax.Context(storage=[]) st.register(ToBeCut) st.register(CutSomething) result = st.get_array(run_id='some_run', targets=strax.camel_to_snake(CutSomething.__name__)) correct_answer = np.sum(input_peaks[_dtype_name] > cut_threshold) assert len(result) == len(input_peaks), "WTF??" assert correct_answer == np.sum(result['cut_something']), ( "Cut plugin does not give boolean arrays correctly") if len(input_peaks): assert strax.endtime(input_peaks).max() == \ strax.endtime(result).max(), "last end time got scrambled" assert np.all(input_peaks['time'] == result['time']), "(start) times got scrambled" assert np.all(strax.endtime(input_peaks) == strax.endtime( result)), "Some end times got scrambled"
def test_nt_minianalyses(): """Number of tests to be run on nT like configs""" if not straxen.utilix_is_configured(): return with tempfile.TemporaryDirectory() as temp_dir: try: print("Temporary directory is ", temp_dir) os.chdir(temp_dir) from .test_plugins import DummyRawRecords, testing_config_nT, test_run_id_nT st = straxen.contexts.xenonnt_online() rundb = st.storage[0] rundb.readonly = True st.storage = [rundb, strax.DataDirectory(temp_dir)] # We want to test the FDC map that only works with CMT test_conf = testing_config_nT.copy() del test_conf['fdc_map'] st.set_config(test_conf) st.set_context_config(dict(forbid_creation_of=())) st.register(DummyRawRecords) rr = st.get_array(test_run_id_nT, 'raw_records') st.make(test_run_id_nT, 'records') st.make(test_run_id_nT, 'peak_basics') st.daq_plot( test_run_id_nT, time_range=(rr['time'][0], strax.endtime(rr)[-1]), vmin=0.1, vmax=1, ) st.plot_records_matrix( test_run_id_nT, time_range=(rr['time'][0], strax.endtime(rr)[-1]), vmin=0.1, vmax=1, group_by='ADC ID', ) plt_clf() st.make(test_run_id_nT, 'event_info') st.load_corrected_positions( test_run_id_nT, time_range=(rr['time'][0], strax.endtime(rr)[-1]), ) # This would be nice to add but with empty events it does not work # st.event_display(test_run_id_nT, # time_range=(rr['time'][0], # strax.endtime(rr)[-1]), # ) # On windows, you cannot delete the current process'git p # working directory, so we have to chdir out first. finally: os.chdir('..')
def fully_contained_in(things, containers): """Return array of len(things) with index of interval in containers for which things are fully contained in a container, or -1 if no such exists. We assume all intervals are sorted by time, and b_intervals nonoverlapping. """ result = np.ones(len(things), dtype=np.int32) * -1 a_starts = things['time'] b_starts = containers['time'] a_ends = strax.endtime(things) b_ends = strax.endtime(containers) _fc_in(a_starts, b_starts, a_ends, b_ends, result) return result
def compute(self, **kwargs): # If not otherwise specified, data kind to loop over # is that of the first dependency (e.g. events) # Can't be in __init__: deps not initialized then if hasattr(self, 'loop_over'): loop_over = self.loop_over else: loop_over = self.deps[self.depends_on[0]].data_kind if not isinstance(loop_over, str): raise TypeError("Please add \"loop_over = <base>\"" " to your plugin definition") # Group into lists of things (e.g. peaks) # contained in the base things (e.g. events) base = kwargs[loop_over] if len(base) > 1: assert np.all(base[1:]['time'] >= strax.endtime(base[:-1])), \ f'{base}s overlap' for k, things in kwargs.items(): # Check for sorting difs = np.diff(things['time']) if difs.min(initial=0) < 0: i_bad = np.argmin(difs) examples = things[i_bad - 1:i_bad + 3] t0 = examples['time'].min() raise ValueError(f'Expected {k} to be sorted, but found ' + str([(x['time'] - t0, strax.endtime(x) - t0) for x in examples])) if k != loop_over: r = strax.split_by_containment(things, base) if len(r) != len(base): raise RuntimeError(f"Split {k} into {len(r)}, " f"should be {len(base)}!") kwargs[k] = r results = np.zeros(len(base), dtype=self.dtype) deps_by_kind = self.dependencies_by_kind() for i in range(len(base)): r = self.compute_loop( base[i], **{k: kwargs[k][i] for k in deps_by_kind if k != loop_over}) # Convert from dict to array row: for k, v in r.items(): results[i][k] = v return results
def test_touching_windows(things, containers, window): result = strax.touching_windows(things, containers, window=window) assert len(result) == len(containers) if len(result): assert np.all((0 <= result) & (result <= len(things))) for c_i, container in enumerate(containers): i_that_touch = np.arange(*result[c_i]) for t_i, thing in enumerate(things): if (strax.endtime(thing) <= container['time'] - window or thing['time'] >= strax.endtime(container) + window): assert t_i not in i_that_touch else: assert t_i in i_that_touch
def compute(self, chunk_i): data = big_chunks[chunk_i] # First determine start (t0) and stop (t1) times for the chunk if chunk_i == 0: t0 = int(data[0]['time']) if chunk_i > 0 else 0 t1 = int(strax.endtime(data[-1])) if len(data) else 1 else: # Just take the previous chunk and take that as start time t0 = _big_chunks_seen[chunk_i - 1].end t1 = int(strax.endtime(data[-1]) if len(data) else t0 + 1) chunk = self.chunk(data=data, start=t0, end=t1) _big_chunks_seen.append(chunk) return chunk
def _get_hitlets_data(hitlets, records, to_pe): rranges = _touching_windows(records['time'], strax.endtime(records), hitlets['time'], strax.endtime(hitlets)) for i, h in enumerate(hitlets): recorded_samples_offset = 0 n_recorded_samples = 0 is_first_record = True for ind, r_ind in enumerate(range(rranges[i][0], rranges[i][1])): r = records[r_ind] if r['channel'] != h['channel']: continue (r_start, r_end), (h_start, h_end) = strax.overlap_indices( r['time'] // r['dt'], r['length'], h['time'] // h['dt'], h['length']) if (r_end - r_start) == 0 and (h_end - h_start) == 0: # _touching_windows will give a range of overlapping records with hitlet # independent of channel. Hence, in rare cases it might be that a record of # channel A touches with a hitlet of channel B which starts before the previous # record of channel b. Hence we get one non-overlapping record in channel b. continue if is_first_record: # We need recorded_samples_offset because hits may extend beyond the boundaries # of our recorded data. As the data is not defined in those regions we have to # chop and realign our data. See the following Example: (fragment 0, 1) [2, 2, 2, # 2] [2, 2, 2] with a hitfinder threshold of 1 and left/right extension of 3. In # the first fragment our hitlet would range from 3 to 8 in the second from 8 to # 11. Hence we have to subtract from every h_start and h_end the offset of 3 to # realign our data. Time and length of the hitlet are updated accordingly. is_first_record = False recorded_samples_offset = h_start h_start -= recorded_samples_offset h_end -= recorded_samples_offset h['data'][ h_start:h_end] += r['data'][r_start:r_end] + r['baseline'] % 1 n_recorded_samples += r_end - r_start # Chop time and length in case hit extends into non-recorded regions. h['time'] += int(recorded_samples_offset * h['dt']) h['length'] = n_recorded_samples h['data'][:] = h['data'][:] * to_pe[h['channel']] h['area'] = np.sum(h['data'])
def compute(self, events): result = {'time': events['time'], 'endtime': strax.endtime(events)} z_obs = -self.config['electron_drift_velocity'] * events['drift_time'] orig_pos = np.vstack([events[f's2_x'], events[f's2_y'], z_obs]).T r_obs = np.linalg.norm(orig_pos[:, :2], axis=1) delta_r = self.map(orig_pos) # apply radial correction with np.errstate(invalid='ignore', divide='ignore'): r_cor = r_obs + delta_r scale = r_cor / r_obs # z correction due to longer drift time for distortion # (geometrical reasoning not valid if |delta_r| > |z_obs|, # as cathetus cannot be longer than hypothenuse) with np.errstate(invalid='ignore'): z_cor = -(z_obs**2 - delta_r**2)**0.5 invalid = np.abs(z_obs) < np.abs(delta_r) z_cor[invalid] = z_obs[invalid] result.update({ 'x': orig_pos[:, 0] * scale, 'y': orig_pos[:, 1] * scale, 'r': r_cor, 'r_naive': r_obs, 'r_field_distortion_correction': delta_r, 'theta': np.arctan2(orig_pos[:, 1], orig_pos[:, 0]), 'z_naive': z_obs, 'z': z_cor }) return result
def compute(self, events): z_obs = - self.config['electron_drift_velocity'] * events['drift_time'] orig_pos = np.vstack([events['s2_x'], events['s2_y'], z_obs]).T r_obs = np.linalg.norm(orig_pos[:, :2], axis=1) delta_r = self.map(orig_pos) with np.errstate(invalid='ignore', divide='ignore'): r_cor = r_obs + delta_r scale = r_cor / r_obs result = dict(time=events['time'], endtime=strax.endtime(events), x=orig_pos[:, 0] * scale, y=orig_pos[:, 1] * scale, r=r_cor, z_naive=z_obs, r_naive=r_obs, r_field_distortion_correction=delta_r, theta=np.arctan2(orig_pos[:, 1], orig_pos[:, 0])) with np.errstate(invalid='ignore'): z_cor = -(z_obs ** 2 - delta_r ** 2) ** 0.5 invalid = np.abs(z_obs) < np.abs(delta_r) # Why?? z_cor[invalid] = z_obs[invalid] result['z'] = z_cor return result
def compute(self, events): # S1 corrections depend on the actual corrected event position. # We use this also for the alternate S1; for e.g. Kr this is # fine as the S1 correction varies slowly. event_positions = np.vstack([events['x'], events['y'], events['z']]).T # For electron lifetime corrections to the S2s, # use lifetimes computed using the main S1. lifetime_corr = np.exp(events['drift_time'] / self.elife) alt_lifetime_corr = ( np.exp((events['alt_s2_interaction_drift_time']) / self.elife)) # S2(x,y) corrections use the observed S2 positions s2_positions = np.vstack([events['s2_x'], events['s2_y']]).T alt_s2_positions = np.vstack([events['alt_s2_x'], events['alt_s2_y']]).T return dict( time=events['time'], endtime=strax.endtime(events), cs1=events['s1_area'] / self.s1_map(event_positions), alt_cs1=events['alt_s1_area'] / self.s1_map(event_positions), cs2=(events['s2_area'] * lifetime_corr / self.s2_map(s2_positions)), alt_cs2=(events['alt_s2_area'] * alt_lifetime_corr / self.s2_map(alt_s2_positions)))
def plot_classified_peak(st, p, t_reference=None, seconds_range=None, run_id=None, single_figure=True, figsize=(10, 4), xaxis='since_run_start', **kwargs): if not kwargs or 'color' not in kwargs: kwargs.update({'color': {0: 'gray', 1: 'b', 2: 'g'}[p['type']]}) if seconds_range is None and run_id is not None: seconds_range = np.array([p['time'], strax.endtime(p) ]) - st.estimate_run_start(run_id) seconds_range = seconds_range / int(1e9) t_reference = st.estimate_run_start(run_id) if single_figure: plt.figure(figsize=figsize) plt.axhline(0, c='k', alpha=0.2) plot_peak(p, t0=t_reference, color={0: 'gray', 1: 'b', 2: 'g'}[p['type']]) if xaxis == 'since_peak': seconds_range_xaxis(seconds_range, t0=seconds_range[0]) elif xaxis: seconds_range_xaxis(seconds_range) else: plt.xticks([]) plt.xlim(*seconds_range) plt.ylabel("Intensity [PE/ns]") if single_figure: plt.tight_layout()
def test_find_peaks(hits, min_channels, min_area): hits['area'] = 1 gap_threshold = 10 peaks = strax.find_peaks(hits, adc_to_pe=np.ones(1), right_extension=0, left_extension=0, gap_threshold=gap_threshold, min_channels=min_channels, min_area=min_area) # Check sanity assert np.all(peaks['length'] > 0) assert np.all(peaks['n_hits'] > 0) # Check if requirements satisfied if min_area != 0: assert np.all(peaks['area'] >= min_area) if min_channels != 1: assert np.all(peaks['n_hits'] >= min_channels) assert np.all(peaks['max_gap'] < gap_threshold) # Without requirements, all hits must occur in a peak if min_area == 0 and min_channels == 1: assert np.sum(peaks['n_hits']) == len(hits) assert np.all(strax.fully_contained_in(hits, peaks) > -1) # Since no extensions, peaks must be at least gap_threshold apart starts = peaks['time'] ends = peaks['time'] + peaks['length'] * peaks['dt'] assert np.all(ends[:-1] + gap_threshold <= starts[1:]) assert np.all(starts == np.sort(starts)), "Not sorted" assert np.all(peaks['time'] < strax.endtime(peaks)), "Non+ peak length"
def hitlets_to_hv_points( hitlets, t_ref=None, ): """ Function which converts hitlets into hv.Points used in the different plots. Computes hitlet times as relative times with respect to the first hitlet if t_ref is not set. """ import holoviews as hv if not len(hitlets): raise ValueError('Expected at least a single hitlet.') if isinstance(hitlets, np.ndarray): hitlets = pd.DataFrame(hitlets) # Set relative times: if t_ref is None: t_ref = min(hitlets['time']) time = seconds_from(hitlets['time'], t_ref, unit_conversion=1) hitlets['time'] = time hitlets['endtime'] = strax.endtime(hitlets.to_records()) hitlet_points = hv.Points(hitlets) return hitlet_points
def compute(self, events, peaks): split_peaks = strax.split_by_containment(peaks, events) result = np.zeros(len(events), self.dtype) self.set_nan_defaults(result) # 1. Assign peaks features to main S1 and main S2 in the event for event_i, (event, sp) in enumerate(zip(events, split_peaks)): res_i = result[event_i] # Fetch the features of main S1 and main S2 for idx, main_peak in zip([event['s1_index'], event['s2_index']], ['s1_', 's2_']): if idx >= 0: for key in [ 's1_time_shadow', 's2_time_shadow', 's2_position_shadow' ]: type_str = key.split('_')[0] res_i[f'{main_peak}shadow_{key}'] = sp[ f'shadow_{key}'][idx] res_i[f'{main_peak}dt_{key}'] = sp[f'dt_{key}'][idx] if 'time' in key: res_i[f'{main_peak}nearest_dt_{type_str}'] = sp[ f'nearest_dt_{type_str}'][idx] if 's2' in key: res_i[f'{main_peak}x_{key}'] = sp[f'x_{key}'][idx] res_i[f'{main_peak}y_{key}'] = sp[f'y_{key}'][idx] # Record the PDF of HalfCauchy res_i[f'{main_peak}pdf_s2_position_shadow'] = sp[ 'pdf_s2_position_shadow'][idx] # 2. Set time and endtime for events result['time'] = events['time'] result['endtime'] = strax.endtime(events) return result
def compute(self, peaklets): if not len(peaklets): return peaklets[:0] if self.config['s2_merge_max_gap'] < 0: # Do not merge at all merged_s2s = np.zeros(0, dtype=peaklets.dtype) else: # Find all groups of peaklets separated by < the gap cluster_starts, cluster_stops = strax.find_peak_groups( peaklets, self.config['s2_merge_max_gap']) start_merge_at, end_merge_at = self.get_merge_instructions( peaklets['time'], strax.endtime(peaklets), areas=peaklets['area'], types=peaklets['type'], cluster_starts=cluster_starts, cluster_stops=cluster_stops, max_duration=self.config['s2_merge_max_duration'], max_area=self.config['s2_merge_max_area']) merged_s2s = strax.merge_peaks( peaklets, start_merge_at, end_merge_at, max_buffer=int(self.config['s2_merge_max_duration'] // peaklets['dt'].min())) merged_s2s['type'] = 2 strax.compute_widths(merged_s2s) return merged_s2s
def test_superrun_chunk_and_meta(self): """ Superrun chunks and meta data should contain information about its constituent subruns. """ self.context.make( self.superrun_name, 'records', ) meta = self.context.get_meta(self.superrun_name, 'records') n_chunks = 0 superrun_chunk = None for chunk in self.context.get_iter(self.superrun_name, 'records'): superrun_chunk = chunk n_chunks += 1 assert len(meta['chunks']) == n_chunks == 1 assert meta['chunks'][0]['subruns'] == superrun_chunk.subruns for subrun_id, start_and_end in superrun_chunk.subruns.items(): rr = self.context.get_array(subrun_id, 'records') # Tests below only true for records as we have not rechunked yet. # After rechunking in general data start can be different from chunk start mes = f'Start time did not match for subrun: {subrun_id}' assert rr['time'].min() == start_and_end['start'], mes mes = f'End time did not match for subrun: {subrun_id}' assert np.max(strax.endtime(rr)) == start_and_end['end'], mes
def _make_event_title(event, run_id, width=1600): """ Function which makes the title of the plot for the specified event. Note: To center the title I use a transparent box. :param event: Event which we are plotting :param run_id: run_id :returns: Title as bokeh.models.Div instance """ start = event['time'] date = np.datetime_as_string(start.astype('<M8[ns]'), unit='s') start_ns = start - (start // 10**9) * 10**9 end = strax.endtime(event) end_ns = end - start + start_ns event_number = event['event_number'] text = (f'<h2>Event {event_number} from run {run_id}<br>' f'Recorded at {date[:10]} {date[10:]} UTC,' f' {start_ns} ns - {end_ns} ns </h2>') title = bokeh.models.Div( text=text, style={ 'text-align': 'left', }, sizing_mode='scale_both', width=width, default_size=width, # orientation='vertical', width_policy='fit', margin=(0, 0, -30, 50)) return title
def test_create_and_load_superruns(self): """ Creates "new" superrun data from already existing data. Loads and compare data afterwards. Also tests "add_run_id_field" option. """ subrun_data = self.context.get_array(self.subrun_ids, 'records', progress_bar=False, add_run_id_field=False) self.context.make( self.superrun_name, 'records', ) superrun_data = self.context.get_array( self.superrun_name, 'records', ) assert self.context.is_stored(self.superrun_name, 'records') assert np.all(subrun_data == superrun_data) # Load meta data and check if rechunking worked: chunks = self.context.get_meta(self.superrun_name, 'records')['chunks'] assert len(chunks) == 1 chunk = chunks[0] assert chunk['run_id'] == self.superrun_name assert chunk['first_time'] == subrun_data['time'].min() assert chunk['last_endtime'] == np.max(strax.endtime(subrun_data))
def compute(self, peaks): result = np.ones(len(peaks), dtype=self.dtype) result['time'], result['endtime'] = peaks['time'], strax.endtime(peaks) result['x_' + self.algorithm] *= float('nan') result['y_' + self.algorithm] *= float('nan') if self.model_file is None: # This plugin is disabled since no model is provided return result # Keep large peaks only peak_mask = peaks['area'] > self.config['min_reconstruction_area'] if not np.sum(peak_mask): # Nothing to do, and .predict crashes on empty arrays return result # Getting actual position reconstruction _in = peaks['area_per_channel'][peak_mask, 0:self.config['n_top_pmts']] with np.errstate(divide='ignore', invalid='ignore'): _in = _in / np.max(_in, axis=1).reshape(-1, 1) _in = _in.reshape(-1, self.config['n_top_pmts']) _out = self.model.predict(_in) # writing output to the result result['x_' + self.algorithm][peak_mask] = _out[:, 0] result['y_' + self.algorithm][peak_mask] = _out[:, 1] return result
def compute(self, peaks): result = np.ones(len(peaks), dtype=self.dtype) result['time'], result['endtime'] = peaks['time'], strax.endtime(peaks) result['x'] *= float('nan') result['y'] *= float('nan') # Keep large peaks only peak_mask = peaks['area'] > self.config['min_reconstruction_area'] if not np.sum(peak_mask): # Nothing to do, and .predict crashes on empty arrays return result # Input: normalized hitpatterns in good top PMTs _in = peaks['area_per_channel'][peak_mask, :] _in = _in[:, :self.config['n_top_pmts']][:, self.pmt_mask] with np.errstate(divide='ignore', invalid='ignore'): _in /= _in.sum(axis=1).reshape(-1, 1) # Output: positions in mm (unfortunately), so convert to cm _out = self.nn.predict(_in) / 10 # Set output in valid rows. Do NOT try result[peak_mask]['x'] # unless you want all NaN positions (boolean masks make a copy unless # they are used as the last index) result['x'][peak_mask] = _out[:, 0] result['y'][peak_mask] = _out[:, 1] return result
def iter(self, *args, **kwargs): if not os.path.exists(self.config['pax_raw_dir']): raise FileNotFoundError(self.config['pax_raw_dir']) input_dir = os.path.join(self.config['pax_raw_dir'], self.run_id) pax_files = sorted(glob.glob(input_dir + '/XENON*.zip')) pax_sizes = np.array([os.path.getsize(x) for x in pax_files]) print(f"Found {len(pax_files)} files, {pax_sizes.sum() / 1e9:.2f} GB") last_endtime = 0 for file_i, in_fn in enumerate(pax_files): if (self.config['stop_after_zips'] and file_i >= self.config['stop_after_zips']): break for records in pax_to_records( in_fn, samples_per_record=self.config['samples_per_record'], events_per_chunk=self.config['events_per_chunk']): if not len(records): continue if last_endtime == 0: last_endtime = records[0]['time'] new_endtime = strax.endtime(records).max() yield self.chunk(start=last_endtime, end=new_endtime, data=records) last_endtime = new_endtime