Example #1
0
def bounds_to_records(bs, single=False):
    if not len(bs):
        n_samples = 0
    else:
        n_samples = max([a for b in bs for a in b])
        if n_samples % 2:
            # Make sure we sometimes end in zero
            # TODO: not a great way to do it, you miss other cases..
            n_samples += 1
    if not single:
        # Each bound gets its own pulse, in its own channel
        recs = np.zeros(len(bs), dtype=strax.record_dtype(n_samples))
        for i, (l, r) in enumerate(bs):
            # Add waveform roughly in the center
            length = r - l  # Exclusive right bound, no + 1
            pad = (n_samples - (r - l)) // 2
            recs[i]['time'] = l
            recs[i]['length'] = pad + length
            recs[i]['data'][pad:pad + length] = 1
            assert recs[i]['data'].sum() == length
            recs[i]['channel'] = i
        assert len(np.unique(recs['channel'])) == len(bs)
    else:
        # Make a single pulse with 1 inside the bounds, 0 outside
        recs = np.zeros(1, dtype=strax.record_dtype(n_samples))
        for l, r in bs:
            recs[0]['data'][l:r] = 1

    recs['dt'] = 1
    return recs
Example #2
0
class Records(strax.Plugin):
    """
    Shamelessly stolen from straxen
    """
    __version__ = '0.0.2'

    depends_on = ('raw_records', )
    data_kind = 'records'
    compressor = 'zstd'
    parallel = True
    rechunk_on_save = False
    dtype = strax.record_dtype()

    def compute(self, raw_records):
        # Remove records from channels for which the gain is unknown
        # or low
        channels_to_cut = np.argwhere(
            self.config['to_pe'] > (adc_to_e / self.config['min_gain']))
        r = raw_records
        for ch in channels_to_cut.reshape(-1):
            r = r[r['channel'] != ch]

        strax.zero_out_of_bounds(r)
        hits = strax.find_hits(r, threshold=self.config['hit_threshold'])
        strax.cut_outside_hits(
            r,
            hits,
            left_extension=self.config['left_cut_extension'],
            right_extension=self.config['right_cut_extension'])
        return r
Example #3
0
def raw_to_records(raw_records):
    records = np.zeros(
        len(raw_records),
        dtype=strax.record_dtype(
            record_length_from_dtype(raw_records.dtype)))
    strax.copy_to_buffer(raw_records, records, '_copy_raw_records')
    return records
Example #4
0
def _make_fake_records(dummy_records):
    """
    Creates some specific records to test get_hitlet_data.
    """
    nfragments = [len(f) for f in dummy_records]
    records = np.zeros(np.sum(nfragments), strax.record_dtype(6))
    records['dt'] = 1
    time_offset = 10  # Need some start time to avoid negative times

    fragment_ind = 0
    for dr, nf in zip(dummy_records, nfragments):
        for ind, f in enumerate(dr):
            r = records[fragment_ind]
            r['time'] = time_offset
            if ind != (nf - 1):
                r['length'] = len(f)
            else:
                r['length'] = len(f) - _count(f)
            r['data'] = f
            r['record_i'] = ind

            if ind == (nf - 1):
                time_offset += r['length'] + 10  # +10 to ensure non-overlap
            else:
                time_offset += r['length']

            fragment_ind += 1

    pnf = 0
    for nf in nfragments:
        records['pulse_length'][pnf:nf + pnf] = np.sum(
            records['length'][pnf:nf + pnf])
        pnf += nf
    return records
Example #5
0
def bounds_to_records(bs, single=False, single_channel=False):
    """Return strax records corresponding to a list of 2-tuples
    of boundaries.

    By default, for each boundary tuple, create a pulse whose data is 1 inside.
    The pulses are put in different channels, first in 0, second in 1, etc.

    :param single: if True, instead create a single pulse in channel 0
    whose data is 1 inside the given bounds and zero outside.
    TODO: length etc. is not properly set in the single=True mode!
    TODO: this probably needs tests itself...

    :param single_channel: if True, instead create all pulses in channel 0
    You should only feed in disjoint bounds when using this.
    """
    if not len(bs):
        n_samples = 0
    else:
        n_samples = max([a for b in bs for a in b])
        if n_samples % 2:
            # Make sure we sometimes end in zero
            # TODO: not a great way to do it, you miss other cases..
            n_samples += 1
    if not single:
        # Each bound gets its own pulse, in its own channel
        recs = np.zeros(len(bs), dtype=strax.record_dtype(n_samples))
        for i, (l, r) in enumerate(bs):
            # Add waveform roughly in the center
            length = r - l  # Exclusive right bound, no + 1
            pad = (n_samples - (r - l)) // 2
            recs[i]['time'] = l
            recs[i]['length'] = pad + length
            recs[i]['data'][pad:pad + length] = 1
            assert recs[i]['data'].sum() == length
            recs[i]['channel'] = 0 if single_channel else i
        if not single_channel:
            assert len(np.unique(recs['channel'])) == len(bs)
    else:
        # Make a single record with 1 inside the bounds, 0 outside
        recs = np.zeros(1, dtype=strax.record_dtype(n_samples))
        for l, r in bs:
            recs[0]['data'][l:r] = 1
        recs[0]['time'] = 0
        recs[0]['length'] = n_samples

    recs['dt'] = 1
    return recs
Example #6
0
 def __init__(self, config):
     self.config = config
     self.rawdata = wfsim.RawDataOptical(self.config)
     self.record_buffer = np.zeros(
         5000000, dtype=strax.record_dtype())  # 2*250 ms buffer
     self.truth_buffer = np.zeros(10000,
                                  dtype=instruction_dtype +
                                  truth_extra_dtype + [('fill', bool)])
Example #7
0
    def infer_dtype(self):
        self.record_length = strax.record_length_from_dtype(
            self.deps['raw_records_nv'].dtype_for('raw_records_nv'))

        nveto_records_dtype = strax.raw_record_dtype(self.record_length)
        nveto_diagnostic_lone_records_dtype = strax.record_dtype(self.record_length)
        nveto_lone_records_statistics_dtype = lone_record_statistics_dtype(self.config['n_nveto_pmts'])

        dtypes = [nveto_records_dtype,
                  nveto_diagnostic_lone_records_dtype,
                  nveto_lone_records_statistics_dtype]

        return {k: v for k, v in zip(self.provides, dtypes)}
Example #8
0
    def infer_dtype(self):
        # Get record_length from the plugin making raw_records
        self.record_length = strax.record_length_from_dtype(
            self.deps['raw_records'].dtype_for('raw_records'))

        dtype = dict()
        for p in self.provides:
            if 'records' in p:
                dtype[p] = strax.record_dtype(self.record_length)
        dtype['veto_regions'] = strax.hit_dtype
        dtype['pulse_counts'] = pulse_count_dtype(self.config['n_tpc_pmts'])

        return dtype
Example #9
0
class Records(strax.Plugin):
    depends_on = ('raw_records', )
    data_kind = 'records'  # TODO: indicate cuts have been done?
    compressor = 'zstd'
    parallel = True
    rechunk_on_save = False
    dtype = strax.record_dtype()

    def compute(self, raw_records):
        r = strax.exclude_tails(raw_records, to_pe)
        hits = strax.find_hits(r)
        strax.cut_outside_hits(r, hits)
        return r
Example #10
0
    class Records(strax.Plugin):
        provides = 'records'
        depends_on = tuple()
        dtype = strax.record_dtype()

        def iter(self, *args, **kwargs):
            for t in range(n_chunks):
                r = np.zeros(recs_per_chunk, self.dtype)
                r['time'] = t
                r['length'] = 1
                r['dt'] = 1
                r['channel'] = np.arange(len(r))
                yield r
    def infer_dtype(self):
        # Get record_length from the plugin making raw_records
        rr_dtype = self.deps['raw_records'].dtype_for('raw_records')
        record_length = len(np.zeros(1, rr_dtype)[0]['data'])

        dtype = dict()
        for p in self.provides:
            if p.endswith('records'):
                dtype[p] = strax.record_dtype(record_length)

        dtype['veto_regions'] = strax.hit_dtype
        dtype['pulse_counts'] = pulse_count_dtype(n_tpc)

        return dtype
Example #12
0
def test_splitter_outer():
    data = [0, 2, 2, 0, 2, 2, 1]
    records = np.zeros(1, dtype=strax.record_dtype(len(data)))
    records['dt'] = 1
    records['data'] = data
    records['length'] = len(data)
    records['pulse_length'] = len(data)
    to_pe = np.ones(10)

    hits = strax.find_hits(records, np.ones(1))
    hits['left_integration'] = hits['left']
    hits['right_integration'] = hits['right']
    peaks = np.zeros(1, dtype=strax.peak_dtype())
    hitlets = np.zeros(1, dtype=strax.hitlet_with_data_dtype(10))
    for data_type in (peaks, hitlets):
        data_type['dt'] = 1
        data_type['data'][0, :len(data)] = data
        data_type['length'] = len(data)

    rlinks = strax.record_links(records)
    peaks = strax.split_peaks(peaks,
                              hits,
                              records,
                              rlinks,
                              to_pe,
                              algorithm='local_minimum',
                              data_type='peaks',
                              min_height=1,
                              min_ratio=0)

    hitlets = strax.split_peaks(hitlets,
                                hits,
                                records,
                                rlinks,
                                to_pe,
                                algorithm='local_minimum',
                                data_type='hitlets',
                                min_height=1,
                                min_ratio=0)

    for name, data_type in zip(('peaks', 'hitlets'), (peaks, hitlets)):
        data = data_type[0]['data'][:data_type[0]['length']]
        assert np.all(
            data == [0, 2, 2]
        ), f'Wrong split for {name}, got {data}, expected {[0, 2, 2]}.'
        data = data_type[1]['data'][:data_type[1]['length']]
        assert np.all(
            data == [0, 2, 2, 1]
        ), f'Wrong split for {name}, got {data}, expected {[0, 2, 2, 1]}.'
Example #13
0
class RecordReader(strax.Plugin):
    """
    Reads records in from disk
    """
    provides = 'raw_records'
    depends_on = tuple()
    dtype = strax.record_dtype()
    rechunk_on_save = False

    def source_finished(self):
        return True

    def is_ready(self, chunk_i):
        try:
            self._chunk_path(chunk_i)
        except ValueError:
            return False
        else:
            return True

    def _load_chunk(self, fp):
        records = strax.load_file(fp,
                                  'blosc',
                                  strax.record_dtype())
        records = strax.sort_by_time(records)
        return records

    def _chunk_path(self, chunk_i):
        fp = os.path.join(self.config['input_dir'], f'{chunk_i:06d}')
        if not os.path.exists(fp):
            raise ValueError(f"Can't find chunk {chunk_i:06d} in {self.config['input_dir']}")
        return fp

    def compute(self, chunk_i):
        fp = self._chunk_path(chunk_i)
        records = self._load_chunk(fp)

        strax.baseline(records)
        strax.integrate(records)

        if len(records):
            timespan_sec = (records[-1]['time'] - records[0]['time']) / 1e9
            print(f'{chunk_i}: read {records.nbytes/1e6:.2f} MB '
                  f'({len(records)} records, '
                  f'{timespan_sec:.2f} live seconds)')
        else:
            print(f'{chunk_i}: read an empty chunk!')

        return records
Example #14
0
    def test_inputs_are_empty(self):
        hitlets_empty = np.zeros(0, dtype=strax.hitlet_with_data_dtype(2))
        records_empty = np.zeros(0, dtype=strax.record_dtype(10))

        hitlets_result = strax.get_hitlets_data(hitlets_empty, self.records,
                                                np.ones(3000))
        assert len(hitlets_result
                   ) == 0, 'get_hitlet_data returned result for empty hitlets'

        hitlets_result = strax.get_hitlets_data(hitlets_empty, records_empty,
                                                np.ones(3000))
        assert len(hitlets_result
                   ) == 0, 'get_hitlet_data returned result for empty hitlets'

        with self.assertRaises(ValueError):
            strax.get_hitlets_data(self.hitlets, records_empty, np.ones(3000))
Example #15
0
 def _load_chunk(self, path, kind='central'):
     records = [
         strax.load_file(fn, compressor='blosc', dtype=strax.record_dtype())
         for fn in glob.glob(f'{path}/reader_*')
     ]
     records = np.concatenate(records)
     records = strax.sort_by_time(records)
     if kind == 'central':
         return records
     result = strax.from_break(
         records,
         safe_break=int(1e3),  # TODO config?
         left=kind == 'post',
         tolerant=True)
     if self.config['erase']:
         shutil.rmtree(path)
     return result
Example #16
0
def test_n_hits():
    if not straxen.utilix_is_configured():
        return
    records = np.zeros(2, dtype=strax.record_dtype())
    records['length'] = 5
    records['pulse_length'] = 5
    records['dt'] = 1
    records['channel'] = [0, 1]
    records['data'][0, :5] = [0, 1, 1, 0, 1]
    records['data'][1, :5] = [0, 1, 0, 0, 0]

    st = straxen.contexts.xenonnt_online()
    st.set_config({'hit_min_amplitude': 1})
    p = st.get_single_plugin('0', 'peaklets')
    res = p.compute(records, 0, 999)
    peaklets = res['peaklets']
    assert peaklets['n_hits'] == 3, f"Peaklet has the wrong number of hits!"
Example #17
0
    def infer_dtype(self):
        self.record_length = strax.record_length_from_dtype(
            self.deps['raw_records_nv'].dtype_for('raw_records_nv'))

        channel_range = self.config['channel_map']['nveto']
        n_channel = (channel_range[1] - channel_range[0]) + 1
        nveto_records_dtype = strax.raw_record_dtype(self.record_length)
        nveto_diagnostic_lone_records_dtype = strax.record_dtype(
            self.record_length)
        nveto_lone_records_statistics_dtype = lone_record_statistics_dtype(
            n_channel)

        dtypes = [
            nveto_records_dtype, nveto_diagnostic_lone_records_dtype,
            nveto_lone_records_statistics_dtype
        ]

        return {k: v for k, v in zip(self.provides, dtypes)}
Example #18
0
 def _load_chunk(self, path, kind='central'):
     records = [
         strax.load_file(fn, compressor='blosc', dtype=strax.record_dtype())
         for fn in sorted(glob.glob(f'{path}/*'))
     ]
     records = np.concatenate(records)
     records = strax.sort_by_time(records)
     if kind == 'central':
         result = records
     else:
         result = strax.from_break(
             records,
             safe_break=self.config['safe_break_in_pulses'],
             left=kind == 'post',
             tolerant=True)
     if self.config['erase']:
         shutil.rmtree(path)
     return result
Example #19
0
class RecordsFromPax(strax.Plugin):
    provides = 'raw_records'
    data_kind = 'raw_records'
    depends_on = tuple()
    dtype = strax.record_dtype()
    parallel = False

    def iter(self, *args, **kwargs):
        if not os.path.exists(self.config['pax_raw_dir']):
            raise FileNotFoundError(self.config['pax_raw_dir'])
        input_dir = os.path.join(self.config['pax_raw_dir'], self.run_id)
        pax_files = sorted(glob.glob(input_dir + '/*.zip'))
        pax_sizes = np.array([os.path.getsize(x) for x in pax_files])
        print(f"Found {len(pax_files)} files, {pax_sizes.sum() / 1e9:.2f} GB")
        for file_i, in_fn in enumerate(pax_files):
            if (self.config['stop_after_zips']
                    and file_i >= self.config['stop_after_zips']):
                break
            yield strax.xenon.pax_interface.pax_to_records(in_fn)
Example #20
0
    def _make_fake_records(self, dummy_records):
        """
        Creates some specific records to test get_hitlet_data.
        """
        n_fragments = [
            len(pulse_fragemetns) for pulse_fragemetns in dummy_records
        ]
        records = np.zeros(np.sum(n_fragments), strax.record_dtype(6))
        records['dt'] = 1
        time_offset = 10  # Need some start time to avoid negative times

        fragment_ind = 0
        for dr, number_of_fragements in zip(dummy_records, n_fragments):
            for record_i, waveform in enumerate(dr):
                r = records[fragment_ind]
                r['time'] = time_offset

                is_not_last_fragment = record_i != (number_of_fragements - 1)
                if is_not_last_fragment:
                    r['length'] = len(waveform)
                else:
                    r['length'] = len(waveform) - self._count_zle_samples(
                        waveform)
                r['data'] = waveform
                r['record_i'] = record_i

                is_last_fragment = record_i == (number_of_fragements - 1)
                if is_last_fragment:
                    time_offset += r['length'] + 10  # +10 to ensure non-overlap
                else:
                    time_offset += r['length']
                fragment_ind += 1

        pulse_offset = 0
        for number_of_fragements in n_fragments:
            pulse_length = np.sum(
                records['length'][pulse_offset:number_of_fragements +
                                  pulse_offset])
            records['pulse_length'][pulse_offset:number_of_fragements +
                                    pulse_offset] = pulse_length
            pulse_offset += number_of_fragements
        return records
Example #21
0
class Records(strax.Plugin):
    """
    Shamelessly stolen from straxen
    """
    __version__ = '0.0.2'

    depends_on = ('raw_records',)
    data_kind = 'records'   # TODO: indicate cuts have been done?
    compressor = 'zstd'
    parallel = True
    rechunk_on_save = False
    dtype = strax.record_dtype()

    def compute(self, raw_records):
        # Remove records from channels for which the gain is unknown
        r = raw_records[raw_records['channel'] < len(to_pe)]

        hits = strax.find_hits(r)
        strax.cut_outside_hits(r, hits)
        return r
def test_processing():
    """Test ParallelSource plugin under several conditions"""
    # It's always harder with a small mailbox:
    strax.Mailbox.DEFAULT_MAX_MESSAGES = 2
    for request_peaks in (True, False):
        for peaks_parallel in (True, False):
            for max_workers in (1, 2):
                Peaks.parallel = peaks_parallel
                print(f"\nTesting with request_peaks {request_peaks}, "
                      f"peaks_parallel {peaks_parallel}, "
                      f"max_workers {max_workers}")

                mystrax = strax.Context(storage=[], register=[Records, Peaks])
                bla = mystrax.get_array(
                    run_id=run_id,
                    targets='peaks' if request_peaks else 'records',
                    max_workers=max_workers)
                assert len(bla) == recs_per_chunk * n_chunks
                assert bla.dtype == (strax.peak_dtype() if request_peaks else
                                     strax.record_dtype())
Example #23
0
 def load_chunk(self, folder, kind='central'):
     records = np.concatenate([
         strax.load_file(os.path.join(folder, f),
                         compressor='blosc',
                         dtype=strax.record_dtype())
         for f in os.listdir(folder)
     ])
     records = strax.sort_by_time(records)
     if kind == 'central':
         result = records
     else:
         if self.config['do_breaks']:
             result = strax.from_break(records,
                                       safe_break=self.config['safe_break'],
                                       left=kind == 'post',
                                       tolerant=True)
         else:
             result = records
     result['time'] += self.config['run_start']
     return result
class Records(strax.ParallelSourcePlugin):
    provides = 'records'
    depends_on = tuple()
    dtype = strax.record_dtype()

    def compute(self, chunk_i):
        if self.config['crash']:
            raise SomeCrash("CRASH!!!!")
        r = np.zeros(recs_per_chunk, self.dtype)
        r['time'] = chunk_i
        r['length'] = 1
        r['dt'] = 1
        r['channel'] = np.arange(len(r))
        return r

    def source_finished(self):
        return True

    def is_ready(self, chunk_i):
        return chunk_i < n_chunks
Example #25
0
class Records(strax.Plugin):
    __version__ = '0.1.1'

    depends_on = ('raw_records', )
    data_kind = 'records'
    compressor = 'zstd'
    parallel = 'process'
    rechunk_on_save = False
    dtype = strax.record_dtype()

    def setup(self):
        self.to_pe = get_to_pe(self.run_id, self.config['to_pe_file'])

    def compute(self, raw_records):
        # Remove records from funny channels (if present)
        r = raw_records[raw_records['channel'] < len(self.to_pe)]

        # Do not trust in DAQ + strax.baseline to leave the
        # out-of-bounds samples to zero.
        strax.zero_out_of_bounds(r)

        if self.config['s2_tail_veto']:
            # Experimental data reduction
            r = strax.exclude_tails(r, self.to_pe)

        # Find hits before filtering
        hits = strax.find_hits(r)

        if self.config['filter']:
            # Filter to concentrate the PMT pulses
            strax.filter_records(r, np.array(self.config['filter']))

        le, re = self.config['save_outside_hits']
        r = strax.cut_outside_hits(r,
                                   hits,
                                   left_extension=le,
                                   right_extension=re)

        # Probably overkill, but just to be sure...
        strax.zero_out_of_bounds(r)
        return r
Example #26
0
class Records(strax.Plugin):
    provides = 'records'
    parallel = 'process'
    depends_on = tuple()
    dtype = strax.record_dtype()

    def source_finished(self):
        return True

    def is_ready(self, chunk_i):
        return chunk_i < n_chunks

    def compute(self, chunk_i):
        if self.config['crash']:
            raise SomeCrash("CRASH!!!!")
        r = np.zeros(recs_per_chunk, self.dtype)
        t0 = chunk_i + self.config['secret_time_offset']
        r['time'] = t0
        r['length'] = r['dt'] = 1
        r['channel'] = np.arange(len(r))
        return self.chunk(start=t0, end=t0 + 1, data=r)
Example #27
0
def test_find_hits():
    """Tests the hitfinder with simple example pulses"""
    for w, should_find_intervals in [
        ([], []), ([1], [(0, 1)]), ([1, 0], [(0, 1)]),
        ([1, 0, 1], [(0, 1), (2, 3)]), ([1, 0, 1, 0], [(0, 1), (2, 3)]),
        ([1, 0, 1, 0, 1], [(0, 1), (2, 3), (4, 5)]),
        ([0, 1, 2, 0, 4, -1, 60, 700, -4], [(1, 3), (4, 5), (6, 8)]),
        ([1, 1, 2, 0, 4, -1, 60, 700, -4], [(0, 3), (4, 5), (6, 8)]),
        ([1, 0, 2, 3, 4, -1, 60, 700, -4], [(0, 1), (2, 5), (6, 8)]),
        ([1, 0, 2, 3, 4, -1, 60, 700, 800], [(0, 1), (2, 5), (6, 9)]),
        ([0, 0, 2, 3, 4, -1, 60, 700, 800], [(2, 5), (6, 9)])
    ]:

        records = np.zeros(1, strax.record_dtype(9))
        records[0]['data'][:len(w)] = w
        records['dt'] = 1
        records['length'] = 9

        results = _find_hits(records)
        assert len(results) == len(should_find_intervals)
        assert results == should_find_intervals
Example #28
0
class Records(strax.Plugin):
    __version__ = '0.0.2'

    depends_on = ('raw_records', )
    data_kind = 'records'  # TODO: indicate cuts have been done?
    compressor = 'zstd'
    parallel = True
    rechunk_on_save = False
    dtype = strax.record_dtype()

    def compute(self, raw_records):
        # Remove records from channels for which the gain is unknown
        r = raw_records[raw_records['channel'] < len(to_pe)]

        # Experimental data reduction: disabled
        # Seems to remove many S2s since it triggers on S1s!
        # (perhaps due to larger amount of afterpuless
        #r = strax.exclude_tails(r, to_pe)

        hits = strax.find_hits(r)
        strax.cut_outside_hits(r, hits)
        return r
Example #29
0
    def test_empty_overlap(self):
        records = np.zeros(3, strax.record_dtype(10))

        # Create fake records for which hitlet overlaps with channel 0
        # although hit is in channel 1. See also github.com/AxFoundation/strax/pull/549
        records['channel'] = (0, 1, 1)
        records['length'] = (10, 3, 10)
        records['time'] = (0, 0, 5)
        records['dt'] = 1
        records['data'][-1] = np.ones(10)

        # Assume we extend our hits by 1 sample hence hitlet starts at 4
        hitlet = np.zeros(1, strax.hitlet_with_data_dtype(11))
        hitlet['time'] = 4
        hitlet['dt'] = 1
        hitlet['length'] = 11
        hitlet['channel'] = 1

        hitlet = strax.get_hitlets_data(hitlet, records, np.ones(10))
        assert hitlet['time'] == 5
        assert hitlet['length'] == 10
        assert np.sum(hitlet['data']) == 10
        assert hitlet['data'][0, 0] == 1
Example #30
0
def pax_to_records(input_filename,
                   samples_per_record=strax.DEFAULT_RECORD_LENGTH,
                   events_per_chunk=10):
    """Return pulse records array from pax zip input_filename

    This only works if you have pax installed in your strax environment,
    which is somewhat tricky.
    """

    # Monkeypatch matplotlib so pax is importable
    # See https://github.com/XENON1T/pax/pull/734
    import matplotlib
    matplotlib._cntr = None

    from pax import core  # Pax is not a dependency

    mypax = core.Processor(
        'XENON1T',
        config_dict=dict(
            pax=dict(look_for_config_in_runs_db=False,
                     plugin_group_names=['input'],
                     encoder_plugin=None,
                     input_name=input_filename),
            # Fast startup: skip loading big maps
            WaveformSimulator=dict(s1_light_yield_map='placeholder_map.json',
                                   s2_light_yield_map='placeholder_map.json',
                                   s1_patterns_file=None,
                                   s2_patterns_file=None)))

    print(f"Starting conversion, {events_per_chunk} evt/chunk")

    results = []

    def finish_results():
        nonlocal results
        records = np.concatenate(results)
        # In strax data, records are always stored
        # sorted, baselined and integrated
        records = strax.sort_by_time(records)
        strax.baseline(records)
        strax.integrate(records)
        print("Returning %d records" % len(records))
        results = []
        return records

    for event in mypax.get_events():
        event = mypax.process_event(event)

        if not len(event.pulses):
            # Triggerless pax data contains many empty events
            # at the end. With the fixed events per chunk setting
            # this can lead to empty files, which confuses strax.
            continue

        pulse_lengths = np.array([p.length for p in event.pulses])

        n_records_tot = records_needed(pulse_lengths, samples_per_record).sum()
        records = np.zeros(n_records_tot,
                           dtype=strax.record_dtype(samples_per_record))
        output_record_index = 0  # Record offset in data

        for p in event.pulses:
            n_records = records_needed(p.length, samples_per_record)

            for rec_i in range(n_records):
                r = records[output_record_index]
                r['time'] = (event.start_time + p.left * 10 +
                             rec_i * samples_per_record * 10)
                r['channel'] = p.channel
                r['pulse_length'] = p.length
                r['record_i'] = rec_i
                r['dt'] = 10

                # How much are we storing in this record?
                if rec_i != n_records - 1:
                    # There's more chunks coming, so we store a full chunk
                    n_store = samples_per_record
                    assert p.length > samples_per_record * (rec_i + 1)
                else:
                    # Just enough to store the rest of the data
                    # Note it's not p.length % samples_per_record!!!
                    # (that would be zero if we have to store a full record)
                    n_store = p.length - samples_per_record * rec_i

                assert 0 <= n_store <= samples_per_record
                r['length'] = n_store

                offset = rec_i * samples_per_record
                r['data'][:n_store] = p.raw_data[offset:offset + n_store]
                output_record_index += 1

        results.append(records)
        if len(results) >= events_per_chunk:
            yield finish_results()

    mypax.shutdown()

    if len(results):
        y = finish_results()
        if len(y):
            yield y