def bounds_to_records(bs, single=False): if not len(bs): n_samples = 0 else: n_samples = max([a for b in bs for a in b]) if n_samples % 2: # Make sure we sometimes end in zero # TODO: not a great way to do it, you miss other cases.. n_samples += 1 if not single: # Each bound gets its own pulse, in its own channel recs = np.zeros(len(bs), dtype=strax.record_dtype(n_samples)) for i, (l, r) in enumerate(bs): # Add waveform roughly in the center length = r - l # Exclusive right bound, no + 1 pad = (n_samples - (r - l)) // 2 recs[i]['time'] = l recs[i]['length'] = pad + length recs[i]['data'][pad:pad + length] = 1 assert recs[i]['data'].sum() == length recs[i]['channel'] = i assert len(np.unique(recs['channel'])) == len(bs) else: # Make a single pulse with 1 inside the bounds, 0 outside recs = np.zeros(1, dtype=strax.record_dtype(n_samples)) for l, r in bs: recs[0]['data'][l:r] = 1 recs['dt'] = 1 return recs
class Records(strax.Plugin): """ Shamelessly stolen from straxen """ __version__ = '0.0.2' depends_on = ('raw_records', ) data_kind = 'records' compressor = 'zstd' parallel = True rechunk_on_save = False dtype = strax.record_dtype() def compute(self, raw_records): # Remove records from channels for which the gain is unknown # or low channels_to_cut = np.argwhere( self.config['to_pe'] > (adc_to_e / self.config['min_gain'])) r = raw_records for ch in channels_to_cut.reshape(-1): r = r[r['channel'] != ch] strax.zero_out_of_bounds(r) hits = strax.find_hits(r, threshold=self.config['hit_threshold']) strax.cut_outside_hits( r, hits, left_extension=self.config['left_cut_extension'], right_extension=self.config['right_cut_extension']) return r
def raw_to_records(raw_records): records = np.zeros( len(raw_records), dtype=strax.record_dtype( record_length_from_dtype(raw_records.dtype))) strax.copy_to_buffer(raw_records, records, '_copy_raw_records') return records
def _make_fake_records(dummy_records): """ Creates some specific records to test get_hitlet_data. """ nfragments = [len(f) for f in dummy_records] records = np.zeros(np.sum(nfragments), strax.record_dtype(6)) records['dt'] = 1 time_offset = 10 # Need some start time to avoid negative times fragment_ind = 0 for dr, nf in zip(dummy_records, nfragments): for ind, f in enumerate(dr): r = records[fragment_ind] r['time'] = time_offset if ind != (nf - 1): r['length'] = len(f) else: r['length'] = len(f) - _count(f) r['data'] = f r['record_i'] = ind if ind == (nf - 1): time_offset += r['length'] + 10 # +10 to ensure non-overlap else: time_offset += r['length'] fragment_ind += 1 pnf = 0 for nf in nfragments: records['pulse_length'][pnf:nf + pnf] = np.sum( records['length'][pnf:nf + pnf]) pnf += nf return records
def bounds_to_records(bs, single=False, single_channel=False): """Return strax records corresponding to a list of 2-tuples of boundaries. By default, for each boundary tuple, create a pulse whose data is 1 inside. The pulses are put in different channels, first in 0, second in 1, etc. :param single: if True, instead create a single pulse in channel 0 whose data is 1 inside the given bounds and zero outside. TODO: length etc. is not properly set in the single=True mode! TODO: this probably needs tests itself... :param single_channel: if True, instead create all pulses in channel 0 You should only feed in disjoint bounds when using this. """ if not len(bs): n_samples = 0 else: n_samples = max([a for b in bs for a in b]) if n_samples % 2: # Make sure we sometimes end in zero # TODO: not a great way to do it, you miss other cases.. n_samples += 1 if not single: # Each bound gets its own pulse, in its own channel recs = np.zeros(len(bs), dtype=strax.record_dtype(n_samples)) for i, (l, r) in enumerate(bs): # Add waveform roughly in the center length = r - l # Exclusive right bound, no + 1 pad = (n_samples - (r - l)) // 2 recs[i]['time'] = l recs[i]['length'] = pad + length recs[i]['data'][pad:pad + length] = 1 assert recs[i]['data'].sum() == length recs[i]['channel'] = 0 if single_channel else i if not single_channel: assert len(np.unique(recs['channel'])) == len(bs) else: # Make a single record with 1 inside the bounds, 0 outside recs = np.zeros(1, dtype=strax.record_dtype(n_samples)) for l, r in bs: recs[0]['data'][l:r] = 1 recs[0]['time'] = 0 recs[0]['length'] = n_samples recs['dt'] = 1 return recs
def __init__(self, config): self.config = config self.rawdata = wfsim.RawDataOptical(self.config) self.record_buffer = np.zeros( 5000000, dtype=strax.record_dtype()) # 2*250 ms buffer self.truth_buffer = np.zeros(10000, dtype=instruction_dtype + truth_extra_dtype + [('fill', bool)])
def infer_dtype(self): self.record_length = strax.record_length_from_dtype( self.deps['raw_records_nv'].dtype_for('raw_records_nv')) nveto_records_dtype = strax.raw_record_dtype(self.record_length) nveto_diagnostic_lone_records_dtype = strax.record_dtype(self.record_length) nveto_lone_records_statistics_dtype = lone_record_statistics_dtype(self.config['n_nveto_pmts']) dtypes = [nveto_records_dtype, nveto_diagnostic_lone_records_dtype, nveto_lone_records_statistics_dtype] return {k: v for k, v in zip(self.provides, dtypes)}
def infer_dtype(self): # Get record_length from the plugin making raw_records self.record_length = strax.record_length_from_dtype( self.deps['raw_records'].dtype_for('raw_records')) dtype = dict() for p in self.provides: if 'records' in p: dtype[p] = strax.record_dtype(self.record_length) dtype['veto_regions'] = strax.hit_dtype dtype['pulse_counts'] = pulse_count_dtype(self.config['n_tpc_pmts']) return dtype
class Records(strax.Plugin): depends_on = ('raw_records', ) data_kind = 'records' # TODO: indicate cuts have been done? compressor = 'zstd' parallel = True rechunk_on_save = False dtype = strax.record_dtype() def compute(self, raw_records): r = strax.exclude_tails(raw_records, to_pe) hits = strax.find_hits(r) strax.cut_outside_hits(r, hits) return r
class Records(strax.Plugin): provides = 'records' depends_on = tuple() dtype = strax.record_dtype() def iter(self, *args, **kwargs): for t in range(n_chunks): r = np.zeros(recs_per_chunk, self.dtype) r['time'] = t r['length'] = 1 r['dt'] = 1 r['channel'] = np.arange(len(r)) yield r
def infer_dtype(self): # Get record_length from the plugin making raw_records rr_dtype = self.deps['raw_records'].dtype_for('raw_records') record_length = len(np.zeros(1, rr_dtype)[0]['data']) dtype = dict() for p in self.provides: if p.endswith('records'): dtype[p] = strax.record_dtype(record_length) dtype['veto_regions'] = strax.hit_dtype dtype['pulse_counts'] = pulse_count_dtype(n_tpc) return dtype
def test_splitter_outer(): data = [0, 2, 2, 0, 2, 2, 1] records = np.zeros(1, dtype=strax.record_dtype(len(data))) records['dt'] = 1 records['data'] = data records['length'] = len(data) records['pulse_length'] = len(data) to_pe = np.ones(10) hits = strax.find_hits(records, np.ones(1)) hits['left_integration'] = hits['left'] hits['right_integration'] = hits['right'] peaks = np.zeros(1, dtype=strax.peak_dtype()) hitlets = np.zeros(1, dtype=strax.hitlet_with_data_dtype(10)) for data_type in (peaks, hitlets): data_type['dt'] = 1 data_type['data'][0, :len(data)] = data data_type['length'] = len(data) rlinks = strax.record_links(records) peaks = strax.split_peaks(peaks, hits, records, rlinks, to_pe, algorithm='local_minimum', data_type='peaks', min_height=1, min_ratio=0) hitlets = strax.split_peaks(hitlets, hits, records, rlinks, to_pe, algorithm='local_minimum', data_type='hitlets', min_height=1, min_ratio=0) for name, data_type in zip(('peaks', 'hitlets'), (peaks, hitlets)): data = data_type[0]['data'][:data_type[0]['length']] assert np.all( data == [0, 2, 2] ), f'Wrong split for {name}, got {data}, expected {[0, 2, 2]}.' data = data_type[1]['data'][:data_type[1]['length']] assert np.all( data == [0, 2, 2, 1] ), f'Wrong split for {name}, got {data}, expected {[0, 2, 2, 1]}.'
class RecordReader(strax.Plugin): """ Reads records in from disk """ provides = 'raw_records' depends_on = tuple() dtype = strax.record_dtype() rechunk_on_save = False def source_finished(self): return True def is_ready(self, chunk_i): try: self._chunk_path(chunk_i) except ValueError: return False else: return True def _load_chunk(self, fp): records = strax.load_file(fp, 'blosc', strax.record_dtype()) records = strax.sort_by_time(records) return records def _chunk_path(self, chunk_i): fp = os.path.join(self.config['input_dir'], f'{chunk_i:06d}') if not os.path.exists(fp): raise ValueError(f"Can't find chunk {chunk_i:06d} in {self.config['input_dir']}") return fp def compute(self, chunk_i): fp = self._chunk_path(chunk_i) records = self._load_chunk(fp) strax.baseline(records) strax.integrate(records) if len(records): timespan_sec = (records[-1]['time'] - records[0]['time']) / 1e9 print(f'{chunk_i}: read {records.nbytes/1e6:.2f} MB ' f'({len(records)} records, ' f'{timespan_sec:.2f} live seconds)') else: print(f'{chunk_i}: read an empty chunk!') return records
def test_inputs_are_empty(self): hitlets_empty = np.zeros(0, dtype=strax.hitlet_with_data_dtype(2)) records_empty = np.zeros(0, dtype=strax.record_dtype(10)) hitlets_result = strax.get_hitlets_data(hitlets_empty, self.records, np.ones(3000)) assert len(hitlets_result ) == 0, 'get_hitlet_data returned result for empty hitlets' hitlets_result = strax.get_hitlets_data(hitlets_empty, records_empty, np.ones(3000)) assert len(hitlets_result ) == 0, 'get_hitlet_data returned result for empty hitlets' with self.assertRaises(ValueError): strax.get_hitlets_data(self.hitlets, records_empty, np.ones(3000))
def _load_chunk(self, path, kind='central'): records = [ strax.load_file(fn, compressor='blosc', dtype=strax.record_dtype()) for fn in glob.glob(f'{path}/reader_*') ] records = np.concatenate(records) records = strax.sort_by_time(records) if kind == 'central': return records result = strax.from_break( records, safe_break=int(1e3), # TODO config? left=kind == 'post', tolerant=True) if self.config['erase']: shutil.rmtree(path) return result
def test_n_hits(): if not straxen.utilix_is_configured(): return records = np.zeros(2, dtype=strax.record_dtype()) records['length'] = 5 records['pulse_length'] = 5 records['dt'] = 1 records['channel'] = [0, 1] records['data'][0, :5] = [0, 1, 1, 0, 1] records['data'][1, :5] = [0, 1, 0, 0, 0] st = straxen.contexts.xenonnt_online() st.set_config({'hit_min_amplitude': 1}) p = st.get_single_plugin('0', 'peaklets') res = p.compute(records, 0, 999) peaklets = res['peaklets'] assert peaklets['n_hits'] == 3, f"Peaklet has the wrong number of hits!"
def infer_dtype(self): self.record_length = strax.record_length_from_dtype( self.deps['raw_records_nv'].dtype_for('raw_records_nv')) channel_range = self.config['channel_map']['nveto'] n_channel = (channel_range[1] - channel_range[0]) + 1 nveto_records_dtype = strax.raw_record_dtype(self.record_length) nveto_diagnostic_lone_records_dtype = strax.record_dtype( self.record_length) nveto_lone_records_statistics_dtype = lone_record_statistics_dtype( n_channel) dtypes = [ nveto_records_dtype, nveto_diagnostic_lone_records_dtype, nveto_lone_records_statistics_dtype ] return {k: v for k, v in zip(self.provides, dtypes)}
def _load_chunk(self, path, kind='central'): records = [ strax.load_file(fn, compressor='blosc', dtype=strax.record_dtype()) for fn in sorted(glob.glob(f'{path}/*')) ] records = np.concatenate(records) records = strax.sort_by_time(records) if kind == 'central': result = records else: result = strax.from_break( records, safe_break=self.config['safe_break_in_pulses'], left=kind == 'post', tolerant=True) if self.config['erase']: shutil.rmtree(path) return result
class RecordsFromPax(strax.Plugin): provides = 'raw_records' data_kind = 'raw_records' depends_on = tuple() dtype = strax.record_dtype() parallel = False def iter(self, *args, **kwargs): if not os.path.exists(self.config['pax_raw_dir']): raise FileNotFoundError(self.config['pax_raw_dir']) input_dir = os.path.join(self.config['pax_raw_dir'], self.run_id) pax_files = sorted(glob.glob(input_dir + '/*.zip')) pax_sizes = np.array([os.path.getsize(x) for x in pax_files]) print(f"Found {len(pax_files)} files, {pax_sizes.sum() / 1e9:.2f} GB") for file_i, in_fn in enumerate(pax_files): if (self.config['stop_after_zips'] and file_i >= self.config['stop_after_zips']): break yield strax.xenon.pax_interface.pax_to_records(in_fn)
def _make_fake_records(self, dummy_records): """ Creates some specific records to test get_hitlet_data. """ n_fragments = [ len(pulse_fragemetns) for pulse_fragemetns in dummy_records ] records = np.zeros(np.sum(n_fragments), strax.record_dtype(6)) records['dt'] = 1 time_offset = 10 # Need some start time to avoid negative times fragment_ind = 0 for dr, number_of_fragements in zip(dummy_records, n_fragments): for record_i, waveform in enumerate(dr): r = records[fragment_ind] r['time'] = time_offset is_not_last_fragment = record_i != (number_of_fragements - 1) if is_not_last_fragment: r['length'] = len(waveform) else: r['length'] = len(waveform) - self._count_zle_samples( waveform) r['data'] = waveform r['record_i'] = record_i is_last_fragment = record_i == (number_of_fragements - 1) if is_last_fragment: time_offset += r['length'] + 10 # +10 to ensure non-overlap else: time_offset += r['length'] fragment_ind += 1 pulse_offset = 0 for number_of_fragements in n_fragments: pulse_length = np.sum( records['length'][pulse_offset:number_of_fragements + pulse_offset]) records['pulse_length'][pulse_offset:number_of_fragements + pulse_offset] = pulse_length pulse_offset += number_of_fragements return records
class Records(strax.Plugin): """ Shamelessly stolen from straxen """ __version__ = '0.0.2' depends_on = ('raw_records',) data_kind = 'records' # TODO: indicate cuts have been done? compressor = 'zstd' parallel = True rechunk_on_save = False dtype = strax.record_dtype() def compute(self, raw_records): # Remove records from channels for which the gain is unknown r = raw_records[raw_records['channel'] < len(to_pe)] hits = strax.find_hits(r) strax.cut_outside_hits(r, hits) return r
def test_processing(): """Test ParallelSource plugin under several conditions""" # It's always harder with a small mailbox: strax.Mailbox.DEFAULT_MAX_MESSAGES = 2 for request_peaks in (True, False): for peaks_parallel in (True, False): for max_workers in (1, 2): Peaks.parallel = peaks_parallel print(f"\nTesting with request_peaks {request_peaks}, " f"peaks_parallel {peaks_parallel}, " f"max_workers {max_workers}") mystrax = strax.Context(storage=[], register=[Records, Peaks]) bla = mystrax.get_array( run_id=run_id, targets='peaks' if request_peaks else 'records', max_workers=max_workers) assert len(bla) == recs_per_chunk * n_chunks assert bla.dtype == (strax.peak_dtype() if request_peaks else strax.record_dtype())
def load_chunk(self, folder, kind='central'): records = np.concatenate([ strax.load_file(os.path.join(folder, f), compressor='blosc', dtype=strax.record_dtype()) for f in os.listdir(folder) ]) records = strax.sort_by_time(records) if kind == 'central': result = records else: if self.config['do_breaks']: result = strax.from_break(records, safe_break=self.config['safe_break'], left=kind == 'post', tolerant=True) else: result = records result['time'] += self.config['run_start'] return result
class Records(strax.ParallelSourcePlugin): provides = 'records' depends_on = tuple() dtype = strax.record_dtype() def compute(self, chunk_i): if self.config['crash']: raise SomeCrash("CRASH!!!!") r = np.zeros(recs_per_chunk, self.dtype) r['time'] = chunk_i r['length'] = 1 r['dt'] = 1 r['channel'] = np.arange(len(r)) return r def source_finished(self): return True def is_ready(self, chunk_i): return chunk_i < n_chunks
class Records(strax.Plugin): __version__ = '0.1.1' depends_on = ('raw_records', ) data_kind = 'records' compressor = 'zstd' parallel = 'process' rechunk_on_save = False dtype = strax.record_dtype() def setup(self): self.to_pe = get_to_pe(self.run_id, self.config['to_pe_file']) def compute(self, raw_records): # Remove records from funny channels (if present) r = raw_records[raw_records['channel'] < len(self.to_pe)] # Do not trust in DAQ + strax.baseline to leave the # out-of-bounds samples to zero. strax.zero_out_of_bounds(r) if self.config['s2_tail_veto']: # Experimental data reduction r = strax.exclude_tails(r, self.to_pe) # Find hits before filtering hits = strax.find_hits(r) if self.config['filter']: # Filter to concentrate the PMT pulses strax.filter_records(r, np.array(self.config['filter'])) le, re = self.config['save_outside_hits'] r = strax.cut_outside_hits(r, hits, left_extension=le, right_extension=re) # Probably overkill, but just to be sure... strax.zero_out_of_bounds(r) return r
class Records(strax.Plugin): provides = 'records' parallel = 'process' depends_on = tuple() dtype = strax.record_dtype() def source_finished(self): return True def is_ready(self, chunk_i): return chunk_i < n_chunks def compute(self, chunk_i): if self.config['crash']: raise SomeCrash("CRASH!!!!") r = np.zeros(recs_per_chunk, self.dtype) t0 = chunk_i + self.config['secret_time_offset'] r['time'] = t0 r['length'] = r['dt'] = 1 r['channel'] = np.arange(len(r)) return self.chunk(start=t0, end=t0 + 1, data=r)
def test_find_hits(): """Tests the hitfinder with simple example pulses""" for w, should_find_intervals in [ ([], []), ([1], [(0, 1)]), ([1, 0], [(0, 1)]), ([1, 0, 1], [(0, 1), (2, 3)]), ([1, 0, 1, 0], [(0, 1), (2, 3)]), ([1, 0, 1, 0, 1], [(0, 1), (2, 3), (4, 5)]), ([0, 1, 2, 0, 4, -1, 60, 700, -4], [(1, 3), (4, 5), (6, 8)]), ([1, 1, 2, 0, 4, -1, 60, 700, -4], [(0, 3), (4, 5), (6, 8)]), ([1, 0, 2, 3, 4, -1, 60, 700, -4], [(0, 1), (2, 5), (6, 8)]), ([1, 0, 2, 3, 4, -1, 60, 700, 800], [(0, 1), (2, 5), (6, 9)]), ([0, 0, 2, 3, 4, -1, 60, 700, 800], [(2, 5), (6, 9)]) ]: records = np.zeros(1, strax.record_dtype(9)) records[0]['data'][:len(w)] = w records['dt'] = 1 records['length'] = 9 results = _find_hits(records) assert len(results) == len(should_find_intervals) assert results == should_find_intervals
class Records(strax.Plugin): __version__ = '0.0.2' depends_on = ('raw_records', ) data_kind = 'records' # TODO: indicate cuts have been done? compressor = 'zstd' parallel = True rechunk_on_save = False dtype = strax.record_dtype() def compute(self, raw_records): # Remove records from channels for which the gain is unknown r = raw_records[raw_records['channel'] < len(to_pe)] # Experimental data reduction: disabled # Seems to remove many S2s since it triggers on S1s! # (perhaps due to larger amount of afterpuless #r = strax.exclude_tails(r, to_pe) hits = strax.find_hits(r) strax.cut_outside_hits(r, hits) return r
def test_empty_overlap(self): records = np.zeros(3, strax.record_dtype(10)) # Create fake records for which hitlet overlaps with channel 0 # although hit is in channel 1. See also github.com/AxFoundation/strax/pull/549 records['channel'] = (0, 1, 1) records['length'] = (10, 3, 10) records['time'] = (0, 0, 5) records['dt'] = 1 records['data'][-1] = np.ones(10) # Assume we extend our hits by 1 sample hence hitlet starts at 4 hitlet = np.zeros(1, strax.hitlet_with_data_dtype(11)) hitlet['time'] = 4 hitlet['dt'] = 1 hitlet['length'] = 11 hitlet['channel'] = 1 hitlet = strax.get_hitlets_data(hitlet, records, np.ones(10)) assert hitlet['time'] == 5 assert hitlet['length'] == 10 assert np.sum(hitlet['data']) == 10 assert hitlet['data'][0, 0] == 1
def pax_to_records(input_filename, samples_per_record=strax.DEFAULT_RECORD_LENGTH, events_per_chunk=10): """Return pulse records array from pax zip input_filename This only works if you have pax installed in your strax environment, which is somewhat tricky. """ # Monkeypatch matplotlib so pax is importable # See https://github.com/XENON1T/pax/pull/734 import matplotlib matplotlib._cntr = None from pax import core # Pax is not a dependency mypax = core.Processor( 'XENON1T', config_dict=dict( pax=dict(look_for_config_in_runs_db=False, plugin_group_names=['input'], encoder_plugin=None, input_name=input_filename), # Fast startup: skip loading big maps WaveformSimulator=dict(s1_light_yield_map='placeholder_map.json', s2_light_yield_map='placeholder_map.json', s1_patterns_file=None, s2_patterns_file=None))) print(f"Starting conversion, {events_per_chunk} evt/chunk") results = [] def finish_results(): nonlocal results records = np.concatenate(results) # In strax data, records are always stored # sorted, baselined and integrated records = strax.sort_by_time(records) strax.baseline(records) strax.integrate(records) print("Returning %d records" % len(records)) results = [] return records for event in mypax.get_events(): event = mypax.process_event(event) if not len(event.pulses): # Triggerless pax data contains many empty events # at the end. With the fixed events per chunk setting # this can lead to empty files, which confuses strax. continue pulse_lengths = np.array([p.length for p in event.pulses]) n_records_tot = records_needed(pulse_lengths, samples_per_record).sum() records = np.zeros(n_records_tot, dtype=strax.record_dtype(samples_per_record)) output_record_index = 0 # Record offset in data for p in event.pulses: n_records = records_needed(p.length, samples_per_record) for rec_i in range(n_records): r = records[output_record_index] r['time'] = (event.start_time + p.left * 10 + rec_i * samples_per_record * 10) r['channel'] = p.channel r['pulse_length'] = p.length r['record_i'] = rec_i r['dt'] = 10 # How much are we storing in this record? if rec_i != n_records - 1: # There's more chunks coming, so we store a full chunk n_store = samples_per_record assert p.length > samples_per_record * (rec_i + 1) else: # Just enough to store the rest of the data # Note it's not p.length % samples_per_record!!! # (that would be zero if we have to store a full record) n_store = p.length - samples_per_record * rec_i assert 0 <= n_store <= samples_per_record r['length'] = n_store offset = rec_i * samples_per_record r['data'][:n_store] = p.raw_data[offset:offset + n_store] output_record_index += 1 results.append(records) if len(results) >= events_per_chunk: yield finish_results() mypax.shutdown() if len(results): y = finish_results() if len(y): yield y