def test_to_short_data_field(self): hitlets_to_short = np.zeros(len(self.hitlets), dtype=strax.hitlet_with_data_dtype(2)) strax.copy_to_buffer(self.hitlets, hitlets_to_short, '_refresh_hit_to_hitlet') with self.assertRaises(ValueError): strax.get_hitlets_data(hitlets_to_short, self.records, np.ones(3000))
def test_conditional_entropy(data, size_template_and_ind_max_template): """ Test for conditional entropy. For the template larger int value defines size of the tempalte, smaller int value position of the maximum. """ hitlet = np.zeros(1, dtype=strax.hitlet_with_data_dtype(n_samples=10)) ind_max_template, size_template = np.sort(size_template_and_ind_max_template) # Make dummy hitlet: data = data.astype(np.float32) len_data = len(data) hitlet['data'][0, :len_data] = data[:] hitlet['length'][0] = len_data # Test 1.: Flat template and no data: e1 = strax.conditional_entropy(hitlet, 'flat')[0] if np.sum(data): d = data d = d / np.sum(d) m = d > 0 template = np.ones(np.sum(m), dtype=np.float32) template = template / np.sum(template) e2 = - np.sum(d[m] * np.log(d[m] / template)) assert math.isclose(e1, e2, rel_tol=2*10**-4, abs_tol=10**-4), f"Test 1.: Entropy function: {e1}, entropy test: {e2}" # Test 2.: Arbitrary template: template = np.ones(size_template, dtype=np.float32) template[ind_max_template] = 2 template /= np.sum(template) # Aligning data in a slightly different way as in the function # itself: e2 = _align_compute_entropy(d, template) e1 = strax.conditional_entropy(hitlet, template)[0] assert math.isclose(e1, e2, rel_tol=2*10**-4, abs_tol=10**-4), f"Test 2.: Entropy function: {e1}, entropy test: {e2}" # Test 3.: Squared waveform: # Same as before but this time we square the template and the # data. template = np.ones(size_template, dtype=np.float32) template[ind_max_template] = 2 template = template * template template /= np.sum(template) d = data * data d = d / np.sum(d) e2 = _align_compute_entropy(d, template) e1 = strax.conditional_entropy(hitlet, template, square_data=True)[0] assert math.isclose(e1, e2, rel_tol=10**-4, abs_tol=10**-4), f"Test 3.: Entropy function: {e1}, entropy test: {e2}" else: assert np.isnan(e1), f'Hitlet entropy is {e1}, but expected np.nan'
def compute(self, records_nv, start, end): # Search again for hits in records: hits = strax.find_hits( records_nv, min_amplitude=self.config['hit_min_amplitude_nv']) # Merge concatenate overlapping within a channel. This is important # in case hits were split by record boundaries. In case we # accidentally concatenate two PMT signals we split them later again. hits = strax.concat_overlapping_hits( hits, self.config['save_outside_hits_nv'], self.config['channel_map']['nveto'], start, end) hits = strax.sort_by_time(hits) # Now convert hits into temp_hitlets including the data field: if len(hits): nsamples = hits['length'].max() else: nsamples = 0 temp_hitlets = np.zeros( len(hits), strax.hitlet_with_data_dtype(n_samples=nsamples)) # Generating hitlets and copying relevant information from hits to hitlets. # These hitlets are not stored in the end since this array also contains a data # field which we will drop later. strax.refresh_hit_to_hitlets(hits, temp_hitlets) del hits # Get hitlet data and split hitlets: strax.get_hitlets_data(temp_hitlets, records_nv, to_pe=self.to_pe) temp_hitlets = strax.split_peaks( temp_hitlets, records_nv, self.to_pe, data_type='hitlets', algorithm='local_minimum', min_height=self.config['min_split_nv'], min_ratio=self.config['min_split_ratio_nv']) # Compute other hitlet properties: # We have to loop here 3 times over all hitlets... strax.hitlet_properties(temp_hitlets) entropy = strax.conditional_entropy(temp_hitlets, template='flat', square_data=False) temp_hitlets['entropy'][:] = entropy strax.compute_widths(temp_hitlets) # Remove data field: hitlets = np.zeros(len(temp_hitlets), dtype=strax.hitlet_dtype()) drop_data_field(temp_hitlets, hitlets) return hitlets
def test_splitter_outer(): data = [0, 2, 2, 0, 2, 2, 1] records = np.zeros(1, dtype=strax.record_dtype(len(data))) records['dt'] = 1 records['data'] = data records['length'] = len(data) records['pulse_length'] = len(data) to_pe = np.ones(10) hits = strax.find_hits(records, np.ones(1)) hits['left_integration'] = hits['left'] hits['right_integration'] = hits['right'] peaks = np.zeros(1, dtype=strax.peak_dtype()) hitlets = np.zeros(1, dtype=strax.hitlet_with_data_dtype(10)) for data_type in (peaks, hitlets): data_type['dt'] = 1 data_type['data'][0, :len(data)] = data data_type['length'] = len(data) rlinks = strax.record_links(records) peaks = strax.split_peaks(peaks, hits, records, rlinks, to_pe, algorithm='local_minimum', data_type='peaks', min_height=1, min_ratio=0) hitlets = strax.split_peaks(hitlets, hits, records, rlinks, to_pe, algorithm='local_minimum', data_type='hitlets', min_height=1, min_ratio=0) for name, data_type in zip(('peaks', 'hitlets'), (peaks, hitlets)): data = data_type[0]['data'][:data_type[0]['length']] assert np.all( data == [0, 2, 2] ), f'Wrong split for {name}, got {data}, expected {[0, 2, 2]}.' data = data_type[1]['data'][:data_type[1]['length']] assert np.all( data == [0, 2, 2, 1] ), f'Wrong split for {name}, got {data}, expected {[0, 2, 2, 1]}.'
def test_inputs_are_empty(self): hitlets_empty = np.zeros(0, dtype=strax.hitlet_with_data_dtype(2)) records_empty = np.zeros(0, dtype=strax.record_dtype(10)) hitlets_result = strax.get_hitlets_data(hitlets_empty, self.records, np.ones(3000)) assert len(hitlets_result ) == 0, 'get_hitlet_data returned result for empty hitlets' hitlets_result = strax.get_hitlets_data(hitlets_empty, records_empty, np.ones(3000)) assert len(hitlets_result ) == 0, 'get_hitlet_data returned result for empty hitlets' with self.assertRaises(ValueError): strax.get_hitlets_data(self.hitlets, records_empty, np.ones(3000))
def test_not_defined_get_fhwm(): # This is a specific unity test for some edge-cases in which the full # width half maximum is not defined. odd_hitlets = np.zeros(4, dtype=strax.hitlet_with_data_dtype(10)) odd_hitlets[0]['data'][:5] = [2, 2, 3, 2, 2] odd_hitlets[0]['length'] = 5 odd_hitlets[1]['data'][:2] = [5, 5] odd_hitlets[1]['length'] = 2 odd_hitlets[2]['length'] = 3 odd_hitlets[3]['data'][:3] = [-1, -2, 0] odd_hitlets[3]['length'] = 3 for oh in odd_hitlets: res = strax.get_fwxm(oh) mes = ( f'get_fxhm returned {res} for {oh["data"][:oh["length"]]}!' 'However, the FWHM is not defined and the return should be nan!') assert np.all(np.isnan(res)), mes
def test_get_hitlets_data(): dummy_records = [ # Contains Hitlet #: [ [1, 3, 2, 1, 0, 0], ], # 0 [ [0, 0, 0, 0, 1, 3], # 1 [2, 1, 0, 0, 0, 0] ], # [ [0, 0, 0, 0, 1, 3], # 2 [2, 1, 0, 1, 3, 2], ], # 3 [ [0, 0, 0, 0, 1, 2], # 4 [2, 2, 2, 2, 2, 2], [2, 1, 0, 0, 0, 0] ], [[2, 1, 0, 1, 3, 2]], # 5, 6 [[2, 2, 2, 2, 2, 2]] # 7 ] # Defining the true parameters of the hitlets: true_area = [7, 7, 7, 6, 18, 3, 6, 12] true_time = [10, 28, 46, 51, 68, 88, 91, 104] true_waveform = [[1, 3, 2, 1], [1, 3, 2, 1], [1, 3, 2, 1], [1, 3, 2], [1, 2, 2, 2, 2, 2, 2, 2, 2, 1], [2, 1], [1, 3, 2], [2, 2, 2, 2, 2, 2]] records = _make_fake_records(dummy_records) hits = strax.find_hits(records, min_amplitude=2) hits = strax.concat_overlapping_hits(hits, (1, 1), (0, 1), 0, float('inf')) hitlets = np.zeros( len(hits), strax.hitlet_with_data_dtype(n_samples=np.max(hits['length']))) strax.refresh_hit_to_hitlets(hits, hitlets) strax.get_hitlets_data(hitlets, records, np.array([1, 1])) for i, (a, wf, t) in enumerate(zip(true_area, true_waveform, true_time)): h = hitlets[i] assert h['area'] == a, f'Hitlet {i} has the wrong area' assert np.all(h['data'][:h['length']] == wf), f'Hitlet {i} has the wrong waveform' assert h['time'] == t, f'Hitlet {i} has the wrong starttime'
def test_empty_overlap(self): records = np.zeros(3, strax.record_dtype(10)) # Create fake records for which hitlet overlaps with channel 0 # although hit is in channel 1. See also github.com/AxFoundation/strax/pull/549 records['channel'] = (0, 1, 1) records['length'] = (10, 3, 10) records['time'] = (0, 0, 5) records['dt'] = 1 records['data'][-1] = np.ones(10) # Assume we extend our hits by 1 sample hence hitlet starts at 4 hitlet = np.zeros(1, strax.hitlet_with_data_dtype(11)) hitlet['time'] = 4 hitlet['dt'] = 1 hitlet['length'] = 11 hitlet['channel'] = 1 hitlet = strax.get_hitlets_data(hitlet, records, np.ones(10)) assert hitlet['time'] == 5 assert hitlet['length'] == 10 assert np.sum(hitlet['data']) == 10 assert hitlet['data'][0, 0] == 1
def test_hitlet_properties(hits_n_data): """ Function which tests refresh_hit_to_hitlets, hitlet_with_data_dtype, and hitlet_properties. :param hits_n_data: :return: """ hits, data = hits_n_data hits['time'] += 100 # Step 1.: Produce fake hits and convert them into hitlets: nsamples = 0 if len(hits) >= 1: nsamples = hits['length'].max() nsamples = np.max((nsamples, 2)) hitlets = np.zeros(len(hits), dtype=strax.hitlet_with_data_dtype(nsamples)) if len(hitlets): assert hitlets['data'].shape[ 1] >= 2, 'Data buffer is not at least 2 samples long.' strax.copy_to_buffer(hits, hitlets, '_refresh_hit_to_hitlet_properties_test') # Testing refresh_hit_to_hitlets for free: assert len(hits) == len( hitlets), 'Somehow hitlets and hits have different sizes' # Testing interval fields: dummy = np.zeros(0, dtype=strax.interval_dtype) for name in dummy.dtype.names: assert np.all(hitlets[name] == hits[name]), f'The entry of the field {name} did not match between hit and ' \ f'hitlets ' # Step 2.: Add to each hit(let) some data for ind, d in enumerate(data): h = hitlets[ind] h['data'][:h['length']] = d[:h['length']] # Step 3.: Add np.nan in data but outside of length: for h in hitlets: if h['length'] < len(h['data']): h['data'][-1] = np.nan # It is enough to test this for a single hitlet: break # Step 4.: Compute properties and apply tests: strax.hitlet_properties(hitlets) for ind, d in enumerate(data): h = hitlets[ind] d = d[:h['length']] pos_max = np.argmax(d) # Checking amplitude things: assert pos_max == h[ 'time_amplitude'], 'Wrong amplitude position found!' assert d[pos_max] == h['amplitude'], 'Wrong amplitude value found!' # Checking FHWM and FWTM: fractions = [0.1, 0.5] for f in fractions: # Get field names for the correct test: if f == 0.5: left = 'left' fwxm = 'fwhm' else: left = 'low_left' fwxm = 'fwtm' amplitude = np.max(d) if np.all(d[0] == d) or np.all(d > amplitude * f): # If all samples are either the same or greater than required height FWXM is not defined: mes = 'All samples are the same or larger than require height.' assert np.isnan( h[left] ), mes + f' Left edge for {f} should have been np.nan.' assert np.isnan( h[left]), mes + f' FWXM for X={f} should have been np.nan.' else: le = np.argwhere(d[:pos_max] <= amplitude * f) if len(le): le = le[-1, 0] m = d[le + 1] - d[le] le = le + 0.5 + (amplitude * f - d[le]) / m else: le = 0 re = np.argwhere(d[pos_max:] <= amplitude * f) if len(re) and re[0, 0] != 0: re = re[0, 0] + pos_max m = d[re] - d[re - 1] re = re + 0.5 + (amplitude * f - d[re]) / m else: re = len(d) assert math.isclose( le, h[left], rel_tol=10**-4, abs_tol=10** -4), f'Left edge does not match for fraction {f}' assert math.isclose( re - le, h[fwxm], rel_tol=10**-4, abs_tol=10**-4), f'FWHM does not match for {f}'
def test_hitlet_properties(hits_n_data): """ Function which tests refresh_hit_to_hitlets, hitlet_with_data_dtype, and hitlet_properties. :param hits_n_data: :return: """ hits, data = hits_n_data hits['time'] += 100 # Step 1.: Produce fake hits and convert them into hitlets: if len(hits) >= 1: nsamples = hits['length'].max() else: nsamples = 2 hitlets = np.zeros(len(hits), dtype=strax.hitlet_with_data_dtype(nsamples)) if len(hitlets): assert hitlets['data'].shape[ 1] >= 2, 'Data buffer is not at least 2 samples long.' strax.refresh_hit_to_hitlets(hits, hitlets) # Testing refresh_hit_to_hitlets for free: assert len(hits) == len( hitlets), 'Somehow hitlets and hits have different sizes' # Tetsing interval fields: dummy = np.zeros(0, dtype=strax.interval_dtype) for name in dummy.dtype.names: assert np.all(hitlets[name] == hits[name]), f'The entry of the field {name} did not match between hit and ' \ f'hitlets ' # Step 2.: Add to each hit(let) some data for ind, d in enumerate(data): h = hitlets[ind] h['data'][:h['length']] = d[:h['length']] strax.hitlet_properties(hitlets) # Step 4.: Apply tests. for ind, d in enumerate(data): h = hitlets[ind] d = d[:h['length']] pos_max = np.argmax(d) # Checking amplitude things: assert pos_max == h[ 'time_amplitude'], 'Wrong amplitude position found!' assert d[pos_max] == h['amplitude'], 'Wrong amplitude value found!' # Checking FHWM and FWTM: fractions = [0.1, 0.5] for f in fractions: amplitude = np.max(d) le = np.argwhere(d[:pos_max] <= amplitude * f) if len(le): le = le[-1, 0] m = d[le + 1] - d[le] le = le + 0.5 + (amplitude * f - d[le]) / m else: le = 0 re = np.argwhere(d[pos_max:] <= amplitude * f) if len(re) and re[0, 0] != 0: re = re[0, 0] + pos_max m = d[re] - d[re - 1] re = re + 0.5 + (amplitude * f - d[re]) / m else: re = len(d) if f == 0.5: left = 'left' fwxm = 'fwhm' else: left = 'low_left' fwxm = 'fwtm' assert math.isclose( le, h[left], rel_tol=10**-4, abs_tol=10**-4), f'Left edge does not match for fraction {f}' assert math.isclose(re - le, h[fwxm], rel_tol=10**-4, abs_tol=10**-4), f'FWHM does not match for {f}'
def test_hitlet_properties(hits_n_data): """ Function which tests refresh_hit_to_hitlets, hitlet_with_data_dtype, and hitlet_properties. :param hits_n_data: :return: """ hits, data = hits_n_data hits['time'] += 100 # Step 1.: Produce fake hits and convert them into hitlets: if len(hits) >= 1: nsamples = hits['length'].max() else: nsamples = 2 hitlets = np.zeros(len(hits), dtype=strax.hitlet_with_data_dtype(nsamples)) if len(hitlets): assert hitlets['data'].shape[1] >= 2, 'Data buffer is not at least 2 samples long.' strax.refresh_hit_to_hitlets(hits, hitlets) # Testing refresh_hit_to_hitlets for free: assert len(hits) == len(hitlets), 'Somehow hitlets and hits have different sizes' # Tetsing interval fields: dummy = np.zeros(0, dtype=strax.interval_dtype) for name in dummy.dtype.names: assert np.all(hitlets[name] == hits[name]), f'The entry of the field {name} did not match between hit and ' \ f'hitlets ' # Step 2.: Add to each hit(let) some data for ind, d in enumerate(data): h = hitlets[ind] h['data'][:h['length']] = d[:h['length']] strax.hitlet_properties(hitlets) # Step 4.: Apply tests. for ind, d in enumerate(data): h = hitlets[ind] d = d[:h['length']] pos_max = np.argmax(d) # Checking amplitude things: assert pos_max == h['time_amplitude'], 'Wrong amplitude position found!' assert d[pos_max] == h['amplitude'], 'Wrong amplitude value found!' # Checking FHWM and FWTM: fractions = [0.1, 0.5] for f in fractions: # Get field names for the correct test: if f == 0.5: left = 'left' fwxm = 'fwhm' else: left = 'low_left' fwxm = 'fwtm' amplitude = np.max(d) if np.all(d[0] == d) or np.all(d > amplitude*f): # If all samples are either the same or greater than required height FWXM is not defined: mes = 'All samples are the same or larger than require height.' assert np.isnan(h[left]), mes + f' Left edge for {f} should have been np.nan.' assert np.isnan(h[left]), mes + f' FWXM for X={f} should have been np.nan.' else: le = np.argwhere(d[:pos_max] <= amplitude * f) if len(le): le = le[-1, 0] m = d[le + 1] - d[le] le = le + 0.5 + (amplitude * f - d[le]) / m else: le = 0 re = np.argwhere(d[pos_max:] <= amplitude * f) if len(re) and re[0, 0] != 0: re = re[0, 0] + pos_max m = d[re] - d[re - 1] re = re + 0.5 + (amplitude * f - d[re]) / m else: re = len(d) assert math.isclose(le, h[left], rel_tol=10**-4, abs_tol=10**-4), f'Left edge does not match for fraction {f}' assert math.isclose(re - le, h[fwxm], rel_tol=10**-4, abs_tol=10**-4), f'FWHM does not match for {f}' # Step 5.: Unity test for not defined get_fhwm-cases: # This is a specific unity test for some edge-cases in which the full # width half maximum is not defined. odd_hitlets = np.zeros(3, dtype=strax.hitlet_with_data_dtype(10)) odd_hitlets[0]['data'][:5] = [2, 2, 3, 2, 2] odd_hitlets[0]['length'] = 5 odd_hitlets[1]['data'][:2] = [5, 5] odd_hitlets[1]['length'] = 2 odd_hitlets[2]['length'] = 3 for oh in odd_hitlets: res = strax.get_fwxm(oh) mes = (f'get_fxhm returned {res} for {oh["data"][:oh["length"]]}!' 'However, the FWHM is not defined and the return should be nan!' ) assert np.all(np.isnan(res)), mes
def get_hitlets_data(hitlets, records, to_pe, min_hitlet_sample=200): """ Function which searches for every hitlet in a given chunk the corresponding records data. Additionally compute the total area of the signal. :param hitlets: Hitlets found in a chunk of records. :param records: Records of the chunk. :param to_pe: Array with area conversion factors from adc/sample to pe/sample. Please make sure that to_pe has the correct shape. The array index should match the channel number. :param min_hitlet_sample: minimal length of the hitlet data field. prevents numba compiling from running into race conditions. :returns: Hitlets including data stored in the "data" field (if it did not exists before it will be added.) """ if len(hitlets) == 0: return np.zeros(0, dtype=strax.hitlet_with_data_dtype(min_hitlet_sample)) if len(hitlets) > 0 and len(records) == 0: raise ValueError('Cannot get data for hitlets if records are empty!') # Numba will not raise any exceptions if to_pe is too short, leading # to strange bugs. to_pe_has_wrong_shape = len(to_pe) < hitlets['channel'].max() if to_pe_has_wrong_shape: raise ValueError('"to_pe" has a wrong shape. Array index must' ' match channel numbers.') hitelts_is_single_row = isinstance(hitlets, np.void) if hitelts_is_single_row: # A structured array becomes void type if a single row is called, # e.g. hitlets[0] which does not work in numba while, hitlets[:1] # does. So we have to convert the row into the correct format first. hitlets = np.array([hitlets]) data_field_in_hitlets = 'data' in hitlets.dtype.names if data_field_in_hitlets: data_is_not_empty = np.any(hitlets['data'] != 0) if data_is_not_empty: raise ValueError('The data field of hitlets must be empty!') data_field_not_long_enough = len( hitlets[0]['data']) < hitlets['length'].max() if data_field_not_long_enough: raise ValueError( 'The data field must be as large as the longest hitlet in our data.' ) hitlets_with_data_field = hitlets else: n_samples = max(min_hitlet_sample, hitlets['length'].max()) hitlets_with_data_field = np.zeros( len(hitlets), strax.hitlet_with_data_dtype(n_samples)) strax.copy_to_buffer(hitlets, hitlets_with_data_field, '_copy_hitlets_to_hitlets_width_data') _get_hitlets_data(hitlets_with_data_field, records, to_pe) if np.any(hitlets_with_data_field['length'] == 0): raise ValueError( 'Try to create zero length hitlets which is forbidden!') return hitlets_with_data_field