def test_create_ts(self): # Technically, there is a race condition here if you happen to run this at exactly midnight UTC! now = datetime.datetime.utcnow() self.h5_file.create_ts('/', 'EURUSD', description=Price) # Want to check that: # - the group exists # - it has a _TS_TABLES_CLASS attribute equal to "TIMESERIES" # - it has a table at yYYYY/mMM/dDD/ts_data, where YYY-MM-DD is today (in UTC) # - the dtype is correct self.assertEqual(self.h5_file.root.EURUSD.__class__, tables.Group) self.assertEqual(self.h5_file.root.EURUSD._v_attrs._TS_TABLES_CLASS, 'TIMESERIES') path = tstables.TsTable._TsTable__partition_date_to_path_array( now.date()) ts_data = self.h5_file.root.EURUSD._f_get_child(path[0])._f_get_child( path[1])._f_get_child(path[2])._f_get_child('ts_data') self.assertEqual(ts_data.attrs._TS_TABLES_EXPECTEDROWS_PER_PARTITION, 10000) self.assertEqual(ts_data._v_dtype[0], tables.dtype_from_descr(Price)[0]) self.assertEqual(ts_data._v_dtype[1], tables.dtype_from_descr(Price)[1])
def test_map_cluster(self): # check the compiled function against result clusters = np.zeros( (20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) result = np.zeros( (20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) result["mean_column"] = np.nan result["mean_row"] = np.nan result["charge"] = np.nan result[1]["event_number"], result[3]["event_number"], result[7][ "event_number"], result[8]["event_number"], result[9][ "event_number"] = 1, 2, 4, 4, 19 result[0]["mean_column"], result[1]["mean_column"], result[3][ "mean_column"], result[7]["mean_column"], result[8][ "mean_column"], result[9]["mean_column"] = 1, 2, 3, 5, 6, 20 result[0]["mean_row"], result[1]["mean_row"], result[3][ "mean_row"], result[7]["mean_row"], result[8]["mean_row"], result[ 9]["mean_row"] = 0, 0, 0, 0, 0, 0 result[0]["charge"], result[1]["charge"], result[3]["charge"], result[ 7]["charge"], result[8]["charge"], result[9][ "charge"] = 0, 0, 0, 0, 0, 0 for index, cluster in enumerate(clusters): cluster['mean_column'] = index + 1 cluster["event_number"] = index clusters[3]["event_number"] = 2 clusters[5]["event_number"] = 4 common_event_number = np.array([0, 1, 1, 2, 3, 3, 3, 4, 4], dtype=np.int64) data_equal = test_tools.nan_equal( first_array=analysis_utils.map_cluster(common_event_number, clusters), second_array=result[:common_event_number.shape[0]]) self.assertTrue(data_equal)
def setUp(self): self.test_dir = tempfile.mkdtemp() self.test_filename = os.path.join(self.test_dir, 'test.h5') test_file = tables.open_file(self.test_filename, 'w') self.test_array = np.arange(100*1000).reshape((1000, 10, 10)) self.test_array_path = '/test_array' array = test_file.create_array(test_file.root, self.test_array_path[1:], self.test_array) self.test_table_ary = np.array([ ( np.random.randint(256, size=np.prod(test_table_col_A_shape)).reshape(test_table_col_A_shape), np.random.rand(*test_table_col_B_shape)) for _ in range(100) ], dtype=tables.dtype_from_descr(TestTableRow)) self.test_table_path = '/test_table' table = test_file.create_table(test_file.root, self.test_table_path[1:], TestTableRow) table.append(self.test_table_ary) self.test_uint64_array = np.arange(10).astype(np.uint64) self.test_uint64_array_path = '/test_uint64' uint64_array = test_file.create_array(test_file.root, self.test_uint64_array_path[1:], self.test_uint64_array) self.test_mock_data_ary = np.array([ ( np.random.rand(*test_mock_data_shape), np.random.randint(10, size=1)[0] ) for _ in range(1000) ], dtype=tables.dtype_from_descr(TestMockDataRow)) self.test_mock_data_path = '/mock_data' mock = test_file.create_table(test_file.root, self.test_mock_data_path[1:], TestMockDataRow) mock.append(self.test_mock_data_ary) test_file.close()
def test_map_cluster(self): # Check the compiled function against result cluster = np.zeros((20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) result = np.zeros((20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) result[1]["event_number"], result[3]["event_number"], result[4]["event_number"], result[7]["event_number"] = 1, 2, 3, 4 for index in range(cluster.shape[0]): cluster[index]["event_number"] = index common_event_number = np.array([0, 1, 1, 2, 3, 3, 3, 4, 4], dtype=np.int64) self.assertTrue(np.all(analysis_utils.map_cluster(common_event_number, cluster) == result[:common_event_number.shape[0]]))
def setUp(self): self.test_dir = tempfile.mkdtemp() self.test_filename = os.path.join(self.test_dir, 'test.h5') test_file = tables.open_file(self.test_filename, 'w') self.test_array = np.arange(100*1000).reshape((1000, 10, 10)) self.test_array_path = '/test_array' array = test_file.create_array(test_file.root, self.test_array_path[1:], self.test_array) self.test_table_ary = np.array([ ( np.random.randint(256, size=np.prod(test_table_col_A_shape)).reshape(test_table_col_A_shape), np.random.rand(*test_table_col_B_shape), np.random.rand() ) for _ in range(1000) ], dtype=tables.dtype_from_descr(TestTableRow)) self.test_table_path = '/test_table' table = test_file.create_table(test_file.root, self.test_table_path[1:], TestTableRow) table.append(self.test_table_ary) table.cols.col_C.create_csindex() self.test_byte_ary = np.random.randint(256, size=1000*1000) self.test_byte_ary_path = '/test_byte_array' byte_array = test_file.create_array(test_file.root, self.test_byte_ary_path[1:], self.test_byte_ary) test_file.close() self._retry_delete = False
def map_cluster(events, cluster): """ Maps the cluster hits on events. Not existing cluster in events have all values set to 0 and column/row/charge set to nan. Too many cluster per event for the event number are omitted and lost! Parameters ---------- events : numpy array One dimensional event number array with increasing event numbers. cluster : np.recarray Recarray with cluster info. The event number is increasing. Example ------- event = [ 0 1 1 2 3 3 ] cluster.event_number = [ 0 1 2 2 3 4 ] gives mapped_cluster.event_number = [ 0 1 0 2 3 0 ] Returns ------- Cluster array with given length of the events array. """ cluster = np.ascontiguousarray(cluster) events = np.ascontiguousarray(events) mapped_cluster = np.zeros( (events.shape[0], ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) mapped_cluster['mean_column'] = np.nan mapped_cluster['mean_row'] = np.nan mapped_cluster['charge'] = np.nan mapped_cluster = np.ascontiguousarray(mapped_cluster) analysis_functions.map_cluster(events, cluster, mapped_cluster) return mapped_cluster
def map_cluster(events, cluster): """ Maps the cluster hits on events. Not existing hits in events have all values set to 0 """ cluster = np.ascontiguousarray(cluster) events = np.ascontiguousarray(events) mapped_cluster = np.zeros((events.shape[0], ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) mapped_cluster = np.ascontiguousarray(mapped_cluster) analysis_functions.map_cluster(events, cluster, mapped_cluster) return mapped_cluster
def test_create_ts(self): # Technically, there is a race condition here if you happen to run this at exactly midnight UTC! now = datetime.datetime.utcnow() self.h5_file.create_ts('/','EURUSD',description=Price) # Want to check that: # - the group exists # - it has a _TS_TABLES_CLASS attribute equal to "TIMESERIES" # - it has a table at yYYYY/mMM/dDD/ts_data, where YYY-MM-DD is today (in UTC) # - the dtype is correct self.assertEqual(self.h5_file.root.EURUSD.__class__, tables.Group) self.assertEqual(self.h5_file.root.EURUSD._v_attrs._TS_TABLES_CLASS,'TIMESERIES') path = tstables.TsTable._TsTable__partition_date_to_path_array(now.date()) ts_data = self.h5_file.root.EURUSD._f_get_child(path[0])._f_get_child(path[1])._f_get_child( path[2])._f_get_child('ts_data') self.assertEqual(ts_data.attrs._TS_TABLES_EXPECTEDROWS_PER_PARTITION,10000) self.assertEqual(ts_data._v_dtype[0],tables.dtype_from_descr(Price)[0]) self.assertEqual(ts_data._v_dtype[1],tables.dtype_from_descr(Price)[1])
def map_cluster(events, cluster): """ Maps the cluster hits on events. Not existing hits in events have all values set to 0 """ cluster = np.ascontiguousarray(cluster) events = np.ascontiguousarray(events) mapped_cluster = np.zeros( (events.shape[0], ), dtype=dtype_from_descr(data_struct.ClusterInfoTable)) mapped_cluster = np.ascontiguousarray(mapped_cluster) analysis_functions.map_cluster(events, cluster, mapped_cluster) return mapped_cluster
def __init__(self): self.parser = Lark(grammar, start='locus') self.locus_transformer = LocusTransformer() self.dtype = dtype_from_descr(LocusTable)
raise NotImplementedError class IndexFormat(tables.IsDescription): DOMAIN_ID = tables.Int64Col(pos=1) POSITION = tables.Int64Col(pos=2) LENGTH = tables.Int64Col(pos=3) class PrivateIndexFormat(tables.IsDescription): LOCATION = tables.Int64Col(pos=1) LENGTH = tables.Int64Col(pos=2) OFFSET = tables.Int64Col(pos=3) private_index_format_dtype = tables.dtype_from_descr(PrivateIndexFormat) class PrivateIndexDataFormat(tables.IsDescription): ID = tables.Int64Col(pos=1) private_index_data_format_dtype = tables.dtype_from_descr( PrivateIndexDataFormat) ######################################################################################################################## def _validator(data): return data
def create_hit_table( input_file_name, tdc_calibation_file, plsr_dac_calibation_file, n_sub_files=8 ): # loops over all root files and merges the data into a hdf5 file aligned at the event number print 'Converting data from CERN ROOT TTree to hdf5 table' charge_calibration_values, tdc_calibration, tdc_error, tot_calibration, tot_error = get_charge_calibration( tdc_calibation_file, plsr_dac_calibation_file) # add all files that have the input_file_name praefix and load their data input_file_names = [ input_file_name + '_t%d.root' % index for index in range(n_sub_files) if os.path.isfile(input_file_name + '_t%d.root' % index) ] n_files = len(input_file_names) input_files_root = [ r.TFile(file_name, 'read') for file_name in input_file_names ] pixel_digits = [ input_file_root.Get('EventData').Get('Pixel Digits') for input_file_root in input_files_root ] n_hits = [pixel_digit.GetEntries() for pixel_digit in pixel_digits] # total pixel hits to analyze n_total_hits = sum(n_hits) with tb.open_file(input_file_name + '_interpreted.h5', 'w') as out_file_h5: hit_table = out_file_h5.create_table( out_file_h5.root, name='Hits_0', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) # tmp data structures to be filles by ROOT data = {} for index, pixel_digit in enumerate(pixel_digits): column_data = {} for branch in pixel_digit.GetListOfBranches( ): # loop over the branches column_data[branch.GetName()] = np.zeros(shape=1, dtype=np.int32) branch.SetAddress(column_data[branch.GetName()].data) data[index] = column_data # result data structur to be filles in the following loop hits = np.zeros((n_total_hits, ), dtype=tb.dtype_from_descr(data_struct.HitInfoTable)) # get file index with lowest event number for pixel_digit in pixel_digits: pixel_digit.GetEntry(0) min_event_number = min( [data[index]['event'][0] for index in range(n_files)]) actual_file_index = np.where( np.array([data[index]['event'][0] for index in range(n_files)]) == min_event_number)[0][0] indices = [0] * n_files table_index = 0 actual_data = data[actual_file_index] actual_event_number = actual_data['event'][0] last_valid_event_number = 0 last_tdc = 0 expected_event_number = actual_event_number indices[actual_file_index] = 1 progress_bar = progressbar.ProgressBar(widgets=[ '', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA() ], maxval=n_total_hits, term_width=80) progress_bar.start() def add_actual_data(actual_data, table_index, hits): if actual_data['column'] >= col_span[0] and actual_data[ 'column'] < col_span[1] and actual_data['row'] >= row_span[ 0] and actual_data['row'] < row_span[1]: # sanity check tdc_interpolation = interp1d( x=charge_calibration_values, y=tdc_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=-1) tdc_error_interpolation = interp1d( x=charge_calibration_values, y=tdc_error[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=1) tdc = tdc_interpolation(actual_data['charge'])[0][0] # if tdc >= 0: # tdc_e = np.abs(tdc_error_interpolation(actual_data['charge'])[0][0]) # if tdc_e > 0: # tdc = np.random.normal(tdc, tdc_e, size=1)[0] # add noise to it from TDC method tot_interpolation = interp1d( x=charge_calibration_values, y=tot_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=-1) tot = tot_interpolation(actual_data['charge'])[0][0] if math.isnan( tdc ): # do not add hits where tdc is nan, these pixel have a very high threshold or do not work return table_index if tdc == -1: if actual_data[ 'charge'] > 5000: # no calibration for TDC due to high charge, thus mark as TDC overflow event hits[table_index]['event_status'] |= 0b0000010100000000 tdc = 4095 else: # below threshold thus no calibration, do not add hit return table_index if tot == -1: if actual_data[ 'charge'] > 5000: # most likely no calibration for TOT due to high charge, thus set max tot tot = 13 else: # below threshold thus no calibration, do not add hit print 'WARNING: Should never trigger!' return table_index hits[table_index][ 'event_status'] |= 0b0000000100000000 # set TDC and trigger word hits[table_index]['event_number'] = actual_data['event'][ 0].astype(np.int64) hits[table_index]['column'] = (actual_data['column'] + 1).astype(np.uint8) hits[table_index]['row'] = (actual_data['row'] + 1).astype( np.uint16) hits[table_index]['TDC'] = int(round(tdc, 0)) hits[table_index]['tot'] = int(round(tot, 0)) table_index += 1 return table_index while True: actual_event_number = actual_data['event'][0] if (actual_event_number == expected_event_number or actual_event_number == expected_event_number - 1): # check if event number increases actual_index, actual_digits, actual_data = indices[ actual_file_index], pixel_digits[actual_file_index], data[ actual_file_index] table_index = add_actual_data(actual_data, table_index, hits) else: # event number does not increase, thus the events are maybe in another file --> switch file or the event number is missing file_event_numbers = [ data[file_index]['event'][0] for file_index in range(n_files) ] # all files actual event number actual_file_index = np.where( file_event_numbers == min(file_event_numbers))[0][0] actual_index, actual_digits, actual_data = indices[ actual_file_index], pixel_digits[actual_file_index], data[ actual_file_index] actual_event_number = actual_data['event'][0] table_index = add_actual_data(actual_data, table_index, hits) progress_bar.update(table_index) expected_event_number = actual_event_number + 1 # next expected event number does not have to exists, simulated events without hits are possible actual_digits.GetEntry(actual_index) if indices[actual_file_index] < n_hits[ actual_file_index]: # simply stop when the first file is fully iterated, some data is lost of cause indices[actual_file_index] += 1 else: break # Set missing data and store to file hits[:table_index]['LVL1ID'] = hits[:table_index]['event_number'] % 255 hits[:table_index]['BCID'] = hits[:table_index]['LVL1ID'] hits[:table_index]['relative_BCID'] = 6 hit_table.append(hits[:table_index]) progress_bar.finish() for input_file_root in input_files_root: input_file_root.Close() del hits # Set data that is common to all hits of one event with tb.open_file(input_file_name + '_interpreted.h5', 'a') as out_file_h5: hits = out_file_h5.root.Hits_0[:] progress_bar = progressbar.ProgressBar(widgets=[ '', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA() ], maxval=hits.shape[0], term_width=80) progress_bar.start() actual_event_number = -1 for ihit in range(hits.shape[0]): if hits[ihit][ 'event_number'] != actual_event_number: # Detect new event if actual_event_number != -1: for jhit in xrange(ihit - 1, -1, -1): if hits[jhit]['event_number'] != actual_event_number: break hits[jhit]['TDC'] = max_tdc hits[jhit]['event_status'] = max_event_status actual_event_number, max_tdc, max_event_status = hits[ihit][ 'event_number'], hits[ihit]['TDC'], hits[ihit][ 'event_status'] else: max_event_status |= hits[ihit]['event_status'] if max_tdc < hits[ihit]['TDC']: max_tdc = hits[ihit]['TDC'] progress_bar.update(ihit) # out_file_h5.remove_node(out_file_h5.root.Hits) hit_table = out_file_h5.create_table( out_file_h5.root, name='Hits', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) hit_table.append(hits) progress_bar.finish()
''' All functions acting on the hits of one DUT are listed here''' import numpy as np import tables as tb from testbeam_analysis import analysis_functions from testbeam_analysis.cpp import data_struct events = np.arange(10, dtype=np.int64) cluster = np.zeros((events.shape[0], ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) cluster = np.ascontiguousarray(cluster) events = np.ascontiguousarray(events) mapped_cluster = np.zeros( (events.shape[0], ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable)) mapped_cluster['mean_column'] = np.nan mapped_cluster['mean_row'] = np.nan mapped_cluster['charge'] = np.nan mapped_cluster = np.ascontiguousarray(mapped_cluster) analysis_functions.map_cluster(events, cluster, mapped_cluster) print 'OK'
def create_hit_table(input_file_name, tdc_calibation_file, plsr_dac_calibation_file, n_sub_files=8): # loops over all root files and merges the data into a hdf5 file aligned at the event number print 'Converting data from CERN ROOT TTree to hdf5 table' charge_calibration_values, tdc_calibration, tdc_error, tot_calibration, tot_error = get_charge_calibration(tdc_calibation_file, plsr_dac_calibation_file) # add all files that have the input_file_name praefix and load their data input_file_names = [input_file_name + '_t%d.root' % index for index in range(n_sub_files) if os.path.isfile(input_file_name + '_t%d.root' % index)] n_files = len(input_file_names) input_files_root = [r.TFile(file_name, 'read') for file_name in input_file_names] pixel_digits = [input_file_root.Get('EventData').Get('Pixel Digits') for input_file_root in input_files_root] n_hits = [pixel_digit.GetEntries() for pixel_digit in pixel_digits] # total pixel hits to analyze n_total_hits = sum(n_hits) with tb.open_file(input_file_name + '_interpreted.h5', 'w') as out_file_h5: hit_table = out_file_h5.create_table(out_file_h5.root, name='Hits', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) # tmp data structures to be filles by ROOT data = {} for index, pixel_digit in enumerate(pixel_digits): column_data = {} for branch in pixel_digit.GetListOfBranches(): # loop over the branches column_data[branch.GetName()] = np.zeros(shape=1, dtype=np.int32) branch.SetAddress(column_data[branch.GetName()].data) data[index] = column_data # result data structur to be filles in the following loop hits = np.zeros((n_total_hits,), dtype=tb.dtype_from_descr(data_struct.HitInfoTable)) # get file index with lowest event number for pixel_digit in pixel_digits: pixel_digit.GetEntry(0) min_event_number = min([data[index]['event'][0] for index in range(n_files)]) actual_file_index = np.where(np.array([data[index]['event'][0] for index in range(n_files)]) == min_event_number)[0][0] indices = [0] * n_files table_index = 0 actual_data = data[actual_file_index] actual_event_number = actual_data['event'][0] last_valid_event_number = 0 last_tdc = 0 expected_event_number = actual_event_number indices[actual_file_index] = 1 progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=n_total_hits, term_width=80) progress_bar.start() def add_actual_data(actual_data, table_index): if actual_data['column'] >= 0 and actual_data['column'] < 80 and actual_data['row'] >= 0 and actual_data['row'] < 336: tdc_interpolation = interp1d(x=charge_calibration_values, y=tdc_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=0) tdc = tdc_interpolation(actual_data['charge']) tot_interpolation = interp1d(x=charge_calibration_values, y=tot_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=0) tot = tot_interpolation(actual_data['charge']) if math.isnan(tdc): # do not add hits where tdc is nan, these pixel have a very high threshold or do not work return table_index if tdc == 0 and actual_data['charge'] > 10000: # no calibration for TDC due to high charge, thus mark as TDC overflow event hits[table_index]['event_status'] |= 0b0000010000000000 tdc = 4095 if tot == 0 and actual_data['charge'] > 10000: # no calibration for TOT due to high charge, thus set max tot tot = 13 hits[table_index]['event_status'] |= 0b0000000100000000 hits[table_index]['event_number'] = actual_data['event'][0].astype(np.int64) hits[table_index]['column'] = (actual_data['column'] + 1).astype(np.uint8) hits[table_index]['row'] = (actual_data['row'] + 1).astype(np.uint16) hits[table_index]['TDC'] = int(actual_data['charge'] / 300.) hits[table_index]['tot'] = int(tot) table_index += 1 return table_index while True: actual_event_number = actual_data['event'][0] if (actual_event_number == expected_event_number or actual_event_number == expected_event_number - 1): # check if event number increases actual_index, actual_digits, actual_data = indices[actual_file_index], pixel_digits[actual_file_index], data[actual_file_index] table_index = add_actual_data(actual_data, table_index) else: # event number does not increase, thus the events are in another file --> switch file or the event number is missing file_event_numbers = [data[file_index]['event'][0] for file_index in range(n_files)] # all files actual event number actual_file_index = np.where(file_event_numbers == min(file_event_numbers))[0][0] actual_index, actual_digits, actual_data = indices[actual_file_index], pixel_digits[actual_file_index], data[actual_file_index] actual_event_number = actual_data['event'][0] table_index = add_actual_data(actual_data, table_index) progress_bar.update(table_index) expected_event_number = actual_event_number + 1 actual_digits.GetEntry(actual_index) if indices[actual_file_index] < n_hits[actual_file_index]: # simply stop when the first file is fully iterated indices[actual_file_index] += 1 else: break # Set missing data and store to file hits[:table_index]['LVL1ID'] = hits[:table_index]['event_number'] % 255 hits[:table_index]['BCID'] = hits[:table_index]['LVL1ID'] hits[:table_index]['relative_BCID'] = 6 hit_table.append(hits[:table_index]) progress_bar.finish() for input_file_root in input_files_root: input_file_root.Close()
def create_hit_table(input_file_name, tdc_calibation_file, plsr_dac_calibation_file, n_sub_files=8): # loops over all root files and merges the data into a hdf5 file aligned at the event number print 'Converting data from CERN ROOT TTree to hdf5 table' charge_calibration_values, tdc_calibration, tdc_error, tot_calibration, tot_error = get_charge_calibration(tdc_calibation_file, plsr_dac_calibation_file) # add all files that have the input_file_name praefix and load their data input_file_names = [input_file_name + '_t%d.root' % index for index in range(n_sub_files) if os.path.isfile(input_file_name + '_t%d.root' % index)] n_files = len(input_file_names) input_files_root = [r.TFile(file_name, 'read') for file_name in input_file_names] pixel_digits = [input_file_root.Get('EventData').Get('Pixel Digits') for input_file_root in input_files_root] n_hits = [pixel_digit.GetEntries() for pixel_digit in pixel_digits] # total pixel hits to analyze n_total_hits = sum(n_hits) with tb.open_file(input_file_name + '_interpreted.h5', 'w') as out_file_h5: hit_table = out_file_h5.create_table(out_file_h5.root, name='Hits_0', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) # tmp data structures to be filles by ROOT data = {} for index, pixel_digit in enumerate(pixel_digits): column_data = {} for branch in pixel_digit.GetListOfBranches(): # loop over the branches column_data[branch.GetName()] = np.zeros(shape=1, dtype=np.int32) branch.SetAddress(column_data[branch.GetName()].data) data[index] = column_data # result data structur to be filles in the following loop hits = np.zeros((n_total_hits,), dtype=tb.dtype_from_descr(data_struct.HitInfoTable)) # get file index with lowest event number for pixel_digit in pixel_digits: pixel_digit.GetEntry(0) min_event_number = min([data[index]['event'][0] for index in range(n_files)]) actual_file_index = np.where(np.array([data[index]['event'][0] for index in range(n_files)]) == min_event_number)[0][0] indices = [0] * n_files table_index = 0 actual_data = data[actual_file_index] actual_event_number = actual_data['event'][0] last_valid_event_number = 0 last_tdc = 0 expected_event_number = actual_event_number indices[actual_file_index] = 1 progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=n_total_hits, term_width=80) progress_bar.start() def add_actual_data(actual_data, table_index, hits): if actual_data['column'] >= col_span[0] and actual_data['column'] < col_span[1] and actual_data['row'] >= row_span[0] and actual_data['row'] < row_span[1]: # sanity check tdc_interpolation = interp1d(x=charge_calibration_values, y=tdc_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=-1) tdc_error_interpolation = interp1d(x=charge_calibration_values, y=tdc_error[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=1) tdc = tdc_interpolation(actual_data['charge'])[0][0] # if tdc >= 0: # tdc_e = np.abs(tdc_error_interpolation(actual_data['charge'])[0][0]) # if tdc_e > 0: # tdc = np.random.normal(tdc, tdc_e, size=1)[0] # add noise to it from TDC method tot_interpolation = interp1d(x=charge_calibration_values, y=tot_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=-1) tot = tot_interpolation(actual_data['charge'])[0][0] if math.isnan(tdc): # do not add hits where tdc is nan, these pixel have a very high threshold or do not work return table_index if tdc == -1: if actual_data['charge'] > 5000: # no calibration for TDC due to high charge, thus mark as TDC overflow event hits[table_index]['event_status'] |= 0b0000010100000000 tdc = 4095 else: # below threshold thus no calibration, do not add hit return table_index if tot == -1: if actual_data['charge'] > 5000: # most likely no calibration for TOT due to high charge, thus set max tot tot = 13 else: # below threshold thus no calibration, do not add hit print 'WARNING: Should never trigger!' return table_index hits[table_index]['event_status'] |= 0b0000000100000000 # set TDC and trigger word hits[table_index]['event_number'] = actual_data['event'][0].astype(np.int64) hits[table_index]['column'] = (actual_data['column'] + 1).astype(np.uint8) hits[table_index]['row'] = (actual_data['row'] + 1).astype(np.uint16) hits[table_index]['TDC'] = int(round(tdc, 0)) hits[table_index]['tot'] = int(round(tot, 0)) table_index += 1 return table_index while True: actual_event_number = actual_data['event'][0] if (actual_event_number == expected_event_number or actual_event_number == expected_event_number - 1): # check if event number increases actual_index, actual_digits, actual_data = indices[actual_file_index], pixel_digits[actual_file_index], data[actual_file_index] table_index = add_actual_data(actual_data, table_index, hits) else: # event number does not increase, thus the events are maybe in another file --> switch file or the event number is missing file_event_numbers = [data[file_index]['event'][0] for file_index in range(n_files)] # all files actual event number actual_file_index = np.where(file_event_numbers == min(file_event_numbers))[0][0] actual_index, actual_digits, actual_data = indices[actual_file_index], pixel_digits[actual_file_index], data[actual_file_index] actual_event_number = actual_data['event'][0] table_index = add_actual_data(actual_data, table_index, hits) progress_bar.update(table_index) expected_event_number = actual_event_number + 1 # next expected event number does not have to exists, simulated events without hits are possible actual_digits.GetEntry(actual_index) if indices[actual_file_index] < n_hits[actual_file_index]: # simply stop when the first file is fully iterated, some data is lost of cause indices[actual_file_index] += 1 else: break # Set missing data and store to file hits[:table_index]['LVL1ID'] = hits[:table_index]['event_number'] % 255 hits[:table_index]['BCID'] = hits[:table_index]['LVL1ID'] hits[:table_index]['relative_BCID'] = 6 hit_table.append(hits[:table_index]) progress_bar.finish() for input_file_root in input_files_root: input_file_root.Close() del hits # Set data that is common to all hits of one event with tb.open_file(input_file_name + '_interpreted.h5', 'a') as out_file_h5: hits = out_file_h5.root.Hits_0[:] progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=hits.shape[0], term_width=80) progress_bar.start() actual_event_number = -1 for ihit in range(hits.shape[0]): if hits[ihit]['event_number'] != actual_event_number: # Detect new event if actual_event_number != -1: for jhit in xrange(ihit - 1, -1, -1): if hits[jhit]['event_number'] != actual_event_number: break hits[jhit]['TDC'] = max_tdc hits[jhit]['event_status'] = max_event_status actual_event_number, max_tdc, max_event_status = hits[ihit]['event_number'], hits[ihit]['TDC'], hits[ihit]['event_status'] else: max_event_status |= hits[ihit]['event_status'] if max_tdc < hits[ihit]['TDC']: max_tdc = hits[ihit]['TDC'] progress_bar.update(ihit) # out_file_h5.remove_node(out_file_h5.root.Hits) hit_table = out_file_h5.create_table(out_file_h5.root, name='Hits', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) hit_table.append(hits) progress_bar.finish()
def create_hit_table( input_file_name, tdc_calibation_file, plsr_dac_calibation_file, n_sub_files=8 ): # loops over all root files and merges the data into a hdf5 file aligned at the event number print 'Converting data from CERN ROOT TTree to hdf5 table' charge_calibration_values, tdc_calibration, tdc_error, tot_calibration, tot_error = get_charge_calibration( tdc_calibation_file, plsr_dac_calibation_file) # add all files that have the input_file_name praefix and load their data input_file_names = [ input_file_name + '_t%d.root' % index for index in range(n_sub_files) if os.path.isfile(input_file_name + '_t%d.root' % index) ] n_files = len(input_file_names) input_files_root = [ r.TFile(file_name, 'read') for file_name in input_file_names ] pixel_digits = [ input_file_root.Get('EventData').Get('Pixel Digits') for input_file_root in input_files_root ] n_hits = [pixel_digit.GetEntries() for pixel_digit in pixel_digits] # total pixel hits to analyze n_total_hits = sum(n_hits) with tb.open_file(input_file_name + '_interpreted.h5', 'w') as out_file_h5: hit_table = out_file_h5.create_table( out_file_h5.root, name='Hits', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False)) # tmp data structures to be filles by ROOT data = {} for index, pixel_digit in enumerate(pixel_digits): column_data = {} for branch in pixel_digit.GetListOfBranches( ): # loop over the branches column_data[branch.GetName()] = np.zeros(shape=1, dtype=np.int32) branch.SetAddress(column_data[branch.GetName()].data) data[index] = column_data # result data structur to be filles in the following loop hits = np.zeros((n_total_hits, ), dtype=tb.dtype_from_descr(data_struct.HitInfoTable)) # get file index with lowest event number for pixel_digit in pixel_digits: pixel_digit.GetEntry(0) min_event_number = min( [data[index]['event'][0] for index in range(n_files)]) actual_file_index = np.where( np.array([data[index]['event'][0] for index in range(n_files)]) == min_event_number)[0][0] indices = [0] * n_files table_index = 0 actual_data = data[actual_file_index] actual_event_number = actual_data['event'][0] last_valid_event_number = 0 last_tdc = 0 expected_event_number = actual_event_number indices[actual_file_index] = 1 progress_bar = progressbar.ProgressBar(widgets=[ '', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA() ], maxval=n_total_hits, term_width=80) progress_bar.start() def add_actual_data(actual_data, table_index): if actual_data['column'] >= 0 and actual_data[ 'column'] < 80 and actual_data['row'] >= 0 and actual_data[ 'row'] < 336: tdc_interpolation = interp1d( x=charge_calibration_values, y=tdc_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=0) tdc = tdc_interpolation(actual_data['charge']) tot_interpolation = interp1d( x=charge_calibration_values, y=tot_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=0) tot = tot_interpolation(actual_data['charge']) if math.isnan( tdc ): # do not add hits where tdc is nan, these pixel have a very high threshold or do not work return table_index if tdc == 0 and actual_data[ 'charge'] > 10000: # no calibration for TDC due to high charge, thus mark as TDC overflow event hits[table_index]['event_status'] |= 0b0000010000000000 tdc = 4095 if tot == 0 and actual_data[ 'charge'] > 10000: # no calibration for TOT due to high charge, thus set max tot tot = 13 hits[table_index]['event_status'] |= 0b0000000100000000 hits[table_index]['event_number'] = actual_data['event'][ 0].astype(np.int64) hits[table_index]['column'] = (actual_data['column'] + 1).astype(np.uint8) hits[table_index]['row'] = (actual_data['row'] + 1).astype( np.uint16) hits[table_index]['TDC'] = int(actual_data['charge'] / 300.) hits[table_index]['tot'] = int(tot) table_index += 1 return table_index while True: actual_event_number = actual_data['event'][0] if (actual_event_number == expected_event_number or actual_event_number == expected_event_number - 1): # check if event number increases actual_index, actual_digits, actual_data = indices[ actual_file_index], pixel_digits[actual_file_index], data[ actual_file_index] table_index = add_actual_data(actual_data, table_index) else: # event number does not increase, thus the events are in another file --> switch file or the event number is missing file_event_numbers = [ data[file_index]['event'][0] for file_index in range(n_files) ] # all files actual event number actual_file_index = np.where( file_event_numbers == min(file_event_numbers))[0][0] actual_index, actual_digits, actual_data = indices[ actual_file_index], pixel_digits[actual_file_index], data[ actual_file_index] actual_event_number = actual_data['event'][0] table_index = add_actual_data(actual_data, table_index) progress_bar.update(table_index) expected_event_number = actual_event_number + 1 actual_digits.GetEntry(actual_index) if indices[actual_file_index] < n_hits[ actual_file_index]: # simply stop when the first file is fully iterated indices[actual_file_index] += 1 else: break # Set missing data and store to file hits[:table_index]['LVL1ID'] = hits[:table_index]['event_number'] % 255 hits[:table_index]['BCID'] = hits[:table_index]['LVL1ID'] hits[:table_index]['relative_BCID'] = 6 hit_table.append(hits[:table_index]) progress_bar.finish() for input_file_root in input_files_root: input_file_root.Close()
except tb.exceptions.HDF5ExtError: logging.info('File in inconsistent state, read again') if __name__ == "__main__": interpreter = PyDataInterpreter() histograming = PyDataHistograming() interpreter.set_info_output(analysis_configuration['infos']) interpreter.set_warning_output(analysis_configuration['warnings']) interpreter.set_FEI4B(False if analysis_configuration['chip_flavor'] == 'fei4a' else True) interpreter.set_trig_count(analysis_configuration['trig_count']) histograming.set_no_scan_parameter() histograming.create_occupancy_hist(True) histograming.create_rel_bcid_hist(True) plt.ion() hits = np.empty((analysis_configuration['chunk_size'], ), dtype=tb.dtype_from_descr(data_struct.HitInfoTable) ) # hold the hits per analyze_raw_data call start_time = datetime.now() analyze_raw_data_file(input_file=analysis_configuration['folder'] + analysis_configuration['scan_name'] + '.h5') logging.info('Script runtime %.1f seconds' % (datetime.now() - start_time).total_seconds()) plt.ioff() plt.show()
def pprint_array(array): offsets = [] for column_name in array.dtype.names: print column_name, "\t", offsets.append(column_name.count("")) for row in array: print "" for i, column in enumerate(row): print " " * (offsets[i] / 2), column, "\t", print "" if __name__ == "__main__": # create some fake data hits = np.ones(shape=(10,), dtype=dtype_from_descr(data_struct.HitInfoTable)) for i, hit in enumerate(hits): hit[0] = i / 2 hit[1] = i / 2 hit[2] = i + 2 hit[3] = i % 2 + 10 hit[4] = i % 3 + 1 hits[8]["event_number"] = 3 # create results arrays to be filled by the clusterizer cluster_hits = np.zeros_like(hits, dtype=dtype_from_descr(data_struct.ClusterHitInfoTable)) cluster = np.zeros_like(hits, dtype=dtype_from_descr(data_struct.ClusterInfoTable)) # create clusterizer object clusterizer = HitClusterizer()