Esempio n. 1
0
    def test_create_ts(self):
        # Technically, there is a race condition here if you happen to run this at exactly midnight UTC!
        now = datetime.datetime.utcnow()
        self.h5_file.create_ts('/', 'EURUSD', description=Price)

        # Want to check that:
        # - the group exists
        # - it has a _TS_TABLES_CLASS attribute equal to "TIMESERIES"
        # - it has a table at yYYYY/mMM/dDD/ts_data, where YYY-MM-DD is today (in UTC)
        # - the dtype is correct
        self.assertEqual(self.h5_file.root.EURUSD.__class__, tables.Group)
        self.assertEqual(self.h5_file.root.EURUSD._v_attrs._TS_TABLES_CLASS,
                         'TIMESERIES')

        path = tstables.TsTable._TsTable__partition_date_to_path_array(
            now.date())

        ts_data = self.h5_file.root.EURUSD._f_get_child(path[0])._f_get_child(
            path[1])._f_get_child(path[2])._f_get_child('ts_data')

        self.assertEqual(ts_data.attrs._TS_TABLES_EXPECTEDROWS_PER_PARTITION,
                         10000)

        self.assertEqual(ts_data._v_dtype[0],
                         tables.dtype_from_descr(Price)[0])
        self.assertEqual(ts_data._v_dtype[1],
                         tables.dtype_from_descr(Price)[1])
Esempio n. 2
0
    def test_map_cluster(self):  # check the compiled function against result
        clusters = np.zeros(
            (20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
        result = np.zeros(
            (20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
        result["mean_column"] = np.nan
        result["mean_row"] = np.nan
        result["charge"] = np.nan
        result[1]["event_number"], result[3]["event_number"], result[7][
            "event_number"], result[8]["event_number"], result[9][
                "event_number"] = 1, 2, 4, 4, 19
        result[0]["mean_column"], result[1]["mean_column"], result[3][
            "mean_column"], result[7]["mean_column"], result[8][
                "mean_column"], result[9]["mean_column"] = 1, 2, 3, 5, 6, 20
        result[0]["mean_row"], result[1]["mean_row"], result[3][
            "mean_row"], result[7]["mean_row"], result[8]["mean_row"], result[
                9]["mean_row"] = 0, 0, 0, 0, 0, 0
        result[0]["charge"], result[1]["charge"], result[3]["charge"], result[
            7]["charge"], result[8]["charge"], result[9][
                "charge"] = 0, 0, 0, 0, 0, 0

        for index, cluster in enumerate(clusters):
            cluster['mean_column'] = index + 1
            cluster["event_number"] = index
        clusters[3]["event_number"] = 2
        clusters[5]["event_number"] = 4

        common_event_number = np.array([0, 1, 1, 2, 3, 3, 3, 4, 4],
                                       dtype=np.int64)

        data_equal = test_tools.nan_equal(
            first_array=analysis_utils.map_cluster(common_event_number,
                                                   clusters),
            second_array=result[:common_event_number.shape[0]])
        self.assertTrue(data_equal)
Esempio n. 3
0
    def setUp(self):
        self.test_dir = tempfile.mkdtemp()
        self.test_filename = os.path.join(self.test_dir, 'test.h5')
        test_file = tables.open_file(self.test_filename, 'w')

        self.test_array = np.arange(100*1000).reshape((1000, 10, 10))
        self.test_array_path = '/test_array'
        array = test_file.create_array(test_file.root, self.test_array_path[1:], self.test_array)

        self.test_table_ary = np.array([ (
            np.random.randint(256, size=np.prod(test_table_col_A_shape)).reshape(test_table_col_A_shape),
            np.random.rand(*test_table_col_B_shape)) for _ in range(100) ],
                                       dtype=tables.dtype_from_descr(TestTableRow))
        self.test_table_path = '/test_table'
        table = test_file.create_table(test_file.root, self.test_table_path[1:], TestTableRow)
        table.append(self.test_table_ary)

        self.test_uint64_array = np.arange(10).astype(np.uint64)
        self.test_uint64_array_path = '/test_uint64'
        uint64_array = test_file.create_array(test_file.root, self.test_uint64_array_path[1:], self.test_uint64_array)

        self.test_mock_data_ary = np.array([ (
            np.random.rand(*test_mock_data_shape),
            np.random.randint(10, size=1)[0] ) for _ in range(1000) ],
                                       dtype=tables.dtype_from_descr(TestMockDataRow))
        self.test_mock_data_path = '/mock_data'
        mock = test_file.create_table(test_file.root, self.test_mock_data_path[1:], TestMockDataRow)
        mock.append(self.test_mock_data_ary)

        test_file.close()
Esempio n. 4
0
    def test_map_cluster(self):  # Check the compiled function against result
        cluster = np.zeros((20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
        result = np.zeros((20, ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
        result[1]["event_number"], result[3]["event_number"], result[4]["event_number"], result[7]["event_number"] = 1, 2, 3, 4

        for index in range(cluster.shape[0]):
            cluster[index]["event_number"] = index

        common_event_number = np.array([0, 1, 1, 2, 3, 3, 3, 4, 4], dtype=np.int64)
        self.assertTrue(np.all(analysis_utils.map_cluster(common_event_number, cluster) == result[:common_event_number.shape[0]]))
    def setUp(self):
        self.test_dir = tempfile.mkdtemp()
        self.test_filename = os.path.join(self.test_dir, 'test.h5')
        test_file = tables.open_file(self.test_filename, 'w')

        self.test_array = np.arange(100*1000).reshape((1000, 10, 10))
        self.test_array_path = '/test_array'
        array = test_file.create_array(test_file.root, self.test_array_path[1:], self.test_array)

        self.test_table_ary = np.array([ (
                np.random.randint(256, size=np.prod(test_table_col_A_shape)).reshape(test_table_col_A_shape),
                np.random.rand(*test_table_col_B_shape),
                np.random.rand()
            ) for _ in range(1000) ],
            dtype=tables.dtype_from_descr(TestTableRow))
        self.test_table_path = '/test_table'
        table = test_file.create_table(test_file.root, self.test_table_path[1:], TestTableRow)
        table.append(self.test_table_ary)
        table.cols.col_C.create_csindex()

        self.test_byte_ary = np.random.randint(256, size=1000*1000)
        self.test_byte_ary_path = '/test_byte_array'
        byte_array = test_file.create_array(test_file.root, self.test_byte_ary_path[1:], self.test_byte_ary)

        test_file.close()

        self._retry_delete = False
def map_cluster(events, cluster):
    """
    Maps the cluster hits on events. Not existing cluster in events have all values set to 0 and column/row/charge set to nan.
    Too many cluster per event for the event number are omitted and lost!

    Parameters
    ----------
    events : numpy array
        One dimensional event number array with increasing event numbers.
    cluster : np.recarray
        Recarray with cluster info. The event number is increasing.

    Example
    -------
    event = [ 0  1  1  2  3  3 ]
    cluster.event_number = [ 0  1  2  2  3  4 ]

    gives mapped_cluster.event_number = [ 0  1  0  2  3  0 ]

    Returns
    -------
    Cluster array with given length of the events array.

    """
    cluster = np.ascontiguousarray(cluster)
    events = np.ascontiguousarray(events)
    mapped_cluster = np.zeros(
        (events.shape[0], ),
        dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
    mapped_cluster['mean_column'] = np.nan
    mapped_cluster['mean_row'] = np.nan
    mapped_cluster['charge'] = np.nan
    mapped_cluster = np.ascontiguousarray(mapped_cluster)
    analysis_functions.map_cluster(events, cluster, mapped_cluster)
    return mapped_cluster
def map_cluster(events, cluster):
    """
    Maps the cluster hits on events. Not existing hits in events have all values set to 0

    """
    cluster = np.ascontiguousarray(cluster)
    events = np.ascontiguousarray(events)
    mapped_cluster = np.zeros((events.shape[0], ), dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
    mapped_cluster = np.ascontiguousarray(mapped_cluster)
    analysis_functions.map_cluster(events, cluster, mapped_cluster)
    return mapped_cluster
Esempio n. 8
0
    def test_create_ts(self):
        # Technically, there is a race condition here if you happen to run this at exactly midnight UTC!
        now = datetime.datetime.utcnow()
        self.h5_file.create_ts('/','EURUSD',description=Price)

        # Want to check that:
        # - the group exists
        # - it has a _TS_TABLES_CLASS attribute equal to "TIMESERIES"
        # - it has a table at yYYYY/mMM/dDD/ts_data, where YYY-MM-DD is today (in UTC)
        # - the dtype is correct
        self.assertEqual(self.h5_file.root.EURUSD.__class__, tables.Group)
        self.assertEqual(self.h5_file.root.EURUSD._v_attrs._TS_TABLES_CLASS,'TIMESERIES')

        path = tstables.TsTable._TsTable__partition_date_to_path_array(now.date())

        ts_data = self.h5_file.root.EURUSD._f_get_child(path[0])._f_get_child(path[1])._f_get_child(
            path[2])._f_get_child('ts_data')

        self.assertEqual(ts_data.attrs._TS_TABLES_EXPECTEDROWS_PER_PARTITION,10000)

        self.assertEqual(ts_data._v_dtype[0],tables.dtype_from_descr(Price)[0])
        self.assertEqual(ts_data._v_dtype[1],tables.dtype_from_descr(Price)[1])
def map_cluster(events, cluster):
    """
    Maps the cluster hits on events. Not existing hits in events have all values set to 0

    """
    cluster = np.ascontiguousarray(cluster)
    events = np.ascontiguousarray(events)
    mapped_cluster = np.zeros(
        (events.shape[0], ),
        dtype=dtype_from_descr(data_struct.ClusterInfoTable))
    mapped_cluster = np.ascontiguousarray(mapped_cluster)
    analysis_functions.map_cluster(events, cluster, mapped_cluster)
    return mapped_cluster
Esempio n. 10
0
 def __init__(self):
     self.parser = Lark(grammar, start='locus')
     self.locus_transformer = LocusTransformer()
     self.dtype = dtype_from_descr(LocusTable)
Esempio n. 11
0
        raise NotImplementedError


class IndexFormat(tables.IsDescription):
    DOMAIN_ID = tables.Int64Col(pos=1)
    POSITION = tables.Int64Col(pos=2)
    LENGTH = tables.Int64Col(pos=3)


class PrivateIndexFormat(tables.IsDescription):
    LOCATION = tables.Int64Col(pos=1)
    LENGTH = tables.Int64Col(pos=2)
    OFFSET = tables.Int64Col(pos=3)


private_index_format_dtype = tables.dtype_from_descr(PrivateIndexFormat)


class PrivateIndexDataFormat(tables.IsDescription):
    ID = tables.Int64Col(pos=1)


private_index_data_format_dtype = tables.dtype_from_descr(
    PrivateIndexDataFormat)

########################################################################################################################


def _validator(data):
    return data
Esempio n. 12
0
def create_hit_table(
    input_file_name,
    tdc_calibation_file,
    plsr_dac_calibation_file,
    n_sub_files=8
):  # loops over all root files and merges the data into a hdf5 file aligned at the event number
    print 'Converting data from CERN ROOT TTree to hdf5 table'
    charge_calibration_values, tdc_calibration, tdc_error, tot_calibration, tot_error = get_charge_calibration(
        tdc_calibation_file, plsr_dac_calibation_file)

    # add all files that have the input_file_name praefix and load their data
    input_file_names = [
        input_file_name + '_t%d.root' % index for index in range(n_sub_files)
        if os.path.isfile(input_file_name + '_t%d.root' % index)
    ]
    n_files = len(input_file_names)
    input_files_root = [
        r.TFile(file_name, 'read') for file_name in input_file_names
    ]
    pixel_digits = [
        input_file_root.Get('EventData').Get('Pixel Digits')
        for input_file_root in input_files_root
    ]
    n_hits = [pixel_digit.GetEntries()
              for pixel_digit in pixel_digits]  # total pixel hits to analyze
    n_total_hits = sum(n_hits)

    with tb.open_file(input_file_name + '_interpreted.h5', 'w') as out_file_h5:
        hit_table = out_file_h5.create_table(
            out_file_h5.root,
            name='Hits_0',
            description=data_struct.HitInfoTable,
            title='hit_data',
            filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))

        # tmp data structures to be filles by ROOT
        data = {}
        for index, pixel_digit in enumerate(pixel_digits):
            column_data = {}
            for branch in pixel_digit.GetListOfBranches(
            ):  # loop over the branches
                column_data[branch.GetName()] = np.zeros(shape=1,
                                                         dtype=np.int32)
                branch.SetAddress(column_data[branch.GetName()].data)
            data[index] = column_data

        # result data structur to be filles in the following loop
        hits = np.zeros((n_total_hits, ),
                        dtype=tb.dtype_from_descr(data_struct.HitInfoTable))

        # get file index with lowest event number
        for pixel_digit in pixel_digits:
            pixel_digit.GetEntry(0)
        min_event_number = min(
            [data[index]['event'][0] for index in range(n_files)])
        actual_file_index = np.where(
            np.array([data[index]['event'][0]
                      for index in range(n_files)]) == min_event_number)[0][0]

        indices = [0] * n_files

        table_index = 0

        actual_data = data[actual_file_index]
        actual_event_number = actual_data['event'][0]
        last_valid_event_number = 0
        last_tdc = 0
        expected_event_number = actual_event_number
        indices[actual_file_index] = 1

        progress_bar = progressbar.ProgressBar(widgets=[
            '',
            progressbar.Percentage(), ' ',
            progressbar.Bar(marker='*', left='|', right='|'), ' ',
            progressbar.AdaptiveETA()
        ],
                                               maxval=n_total_hits,
                                               term_width=80)
        progress_bar.start()

        def add_actual_data(actual_data, table_index, hits):
            if actual_data['column'] >= col_span[0] and actual_data[
                    'column'] < col_span[1] and actual_data['row'] >= row_span[
                        0] and actual_data['row'] < row_span[1]:  # sanity check
                tdc_interpolation = interp1d(
                    x=charge_calibration_values,
                    y=tdc_calibration[actual_data['column'],
                                      actual_data['row']],
                    kind='slinear',
                    bounds_error=False,
                    fill_value=-1)
                tdc_error_interpolation = interp1d(
                    x=charge_calibration_values,
                    y=tdc_error[actual_data['column'], actual_data['row']],
                    kind='slinear',
                    bounds_error=False,
                    fill_value=1)
                tdc = tdc_interpolation(actual_data['charge'])[0][0]
                #                 if tdc >= 0:
                #                     tdc_e = np.abs(tdc_error_interpolation(actual_data['charge'])[0][0])
                #                     if tdc_e > 0:
                #                         tdc = np.random.normal(tdc, tdc_e, size=1)[0]  # add noise to it from TDC method

                tot_interpolation = interp1d(
                    x=charge_calibration_values,
                    y=tot_calibration[actual_data['column'],
                                      actual_data['row']],
                    kind='slinear',
                    bounds_error=False,
                    fill_value=-1)
                tot = tot_interpolation(actual_data['charge'])[0][0]

                if math.isnan(
                        tdc
                ):  # do not add hits where tdc is nan, these pixel have a very high threshold or do not work
                    return table_index

                if tdc == -1:
                    if actual_data[
                            'charge'] > 5000:  # no calibration for TDC due to high charge, thus mark as TDC overflow event
                        hits[table_index]['event_status'] |= 0b0000010100000000
                        tdc = 4095
                    else:  # below threshold thus no calibration, do not add hit
                        return table_index

                if tot == -1:
                    if actual_data[
                            'charge'] > 5000:  # most likely no calibration for TOT due to high charge, thus set max tot
                        tot = 13
                    else:  # below threshold thus no calibration, do not add hit
                        print 'WARNING: Should never trigger!'
                        return table_index

                hits[table_index][
                    'event_status'] |= 0b0000000100000000  # set TDC and trigger word
                hits[table_index]['event_number'] = actual_data['event'][
                    0].astype(np.int64)
                hits[table_index]['column'] = (actual_data['column'] +
                                               1).astype(np.uint8)
                hits[table_index]['row'] = (actual_data['row'] + 1).astype(
                    np.uint16)
                hits[table_index]['TDC'] = int(round(tdc, 0))
                hits[table_index]['tot'] = int(round(tot, 0))

                table_index += 1
            return table_index

        while True:
            actual_event_number = actual_data['event'][0]
            if (actual_event_number == expected_event_number
                    or actual_event_number == expected_event_number -
                    1):  # check if event number increases
                actual_index, actual_digits, actual_data = indices[
                    actual_file_index], pixel_digits[actual_file_index], data[
                        actual_file_index]
                table_index = add_actual_data(actual_data, table_index, hits)
            else:  # event number does not increase, thus the events are maybe in another file --> switch file or the event number is missing
                file_event_numbers = [
                    data[file_index]['event'][0]
                    for file_index in range(n_files)
                ]  # all files actual event number
                actual_file_index = np.where(
                    file_event_numbers == min(file_event_numbers))[0][0]
                actual_index, actual_digits, actual_data = indices[
                    actual_file_index], pixel_digits[actual_file_index], data[
                        actual_file_index]
                actual_event_number = actual_data['event'][0]
                table_index = add_actual_data(actual_data, table_index, hits)

            progress_bar.update(table_index)
            expected_event_number = actual_event_number + 1  # next expected event number does not have to exists, simulated events without hits are possible
            actual_digits.GetEntry(actual_index)

            if indices[actual_file_index] < n_hits[
                    actual_file_index]:  # simply stop when the first file is fully iterated, some data is lost of cause
                indices[actual_file_index] += 1
            else:
                break

        # Set missing data and store to file
        hits[:table_index]['LVL1ID'] = hits[:table_index]['event_number'] % 255
        hits[:table_index]['BCID'] = hits[:table_index]['LVL1ID']
        hits[:table_index]['relative_BCID'] = 6
        hit_table.append(hits[:table_index])

        progress_bar.finish()

        for input_file_root in input_files_root:
            input_file_root.Close()

        del hits

    # Set data that is common to all hits of one event
    with tb.open_file(input_file_name + '_interpreted.h5', 'a') as out_file_h5:
        hits = out_file_h5.root.Hits_0[:]
        progress_bar = progressbar.ProgressBar(widgets=[
            '',
            progressbar.Percentage(), ' ',
            progressbar.Bar(marker='*', left='|', right='|'), ' ',
            progressbar.AdaptiveETA()
        ],
                                               maxval=hits.shape[0],
                                               term_width=80)
        progress_bar.start()
        actual_event_number = -1
        for ihit in range(hits.shape[0]):
            if hits[ihit][
                    'event_number'] != actual_event_number:  # Detect new event
                if actual_event_number != -1:
                    for jhit in xrange(ihit - 1, -1, -1):
                        if hits[jhit]['event_number'] != actual_event_number:
                            break
                        hits[jhit]['TDC'] = max_tdc
                        hits[jhit]['event_status'] = max_event_status
                actual_event_number, max_tdc, max_event_status = hits[ihit][
                    'event_number'], hits[ihit]['TDC'], hits[ihit][
                        'event_status']
            else:
                max_event_status |= hits[ihit]['event_status']
                if max_tdc < hits[ihit]['TDC']:
                    max_tdc = hits[ihit]['TDC']
            progress_bar.update(ihit)


#             out_file_h5.remove_node(out_file_h5.root.Hits)
        hit_table = out_file_h5.create_table(
            out_file_h5.root,
            name='Hits',
            description=data_struct.HitInfoTable,
            title='hit_data',
            filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
        hit_table.append(hits)
        progress_bar.finish()
Esempio n. 13
0
''' All functions acting on the hits of one DUT are listed here'''
import numpy as np
import tables as tb

from testbeam_analysis import analysis_functions
from testbeam_analysis.cpp import data_struct

events = np.arange(10, dtype=np.int64)
cluster = np.zeros((events.shape[0], ),
                   dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))

cluster = np.ascontiguousarray(cluster)
events = np.ascontiguousarray(events)

mapped_cluster = np.zeros(
    (events.shape[0], ),
    dtype=tb.dtype_from_descr(data_struct.ClusterInfoTable))
mapped_cluster['mean_column'] = np.nan
mapped_cluster['mean_row'] = np.nan
mapped_cluster['charge'] = np.nan
mapped_cluster = np.ascontiguousarray(mapped_cluster)

analysis_functions.map_cluster(events, cluster, mapped_cluster)

print 'OK'
Esempio n. 14
0
def create_hit_table(input_file_name, tdc_calibation_file, plsr_dac_calibation_file, n_sub_files=8):  # loops over all root files and merges the data into a hdf5 file aligned at the event number
    print 'Converting data from CERN ROOT TTree to hdf5 table'
    charge_calibration_values, tdc_calibration, tdc_error, tot_calibration, tot_error = get_charge_calibration(tdc_calibation_file, plsr_dac_calibation_file)

    # add all files that have the input_file_name praefix and load their data
    input_file_names = [input_file_name + '_t%d.root' % index for index in range(n_sub_files) if os.path.isfile(input_file_name + '_t%d.root' % index)]
    n_files = len(input_file_names)
    input_files_root = [r.TFile(file_name, 'read') for file_name in input_file_names]
    pixel_digits = [input_file_root.Get('EventData').Get('Pixel Digits') for input_file_root in input_files_root]
    n_hits = [pixel_digit.GetEntries() for pixel_digit in pixel_digits]  # total pixel hits to analyze
    n_total_hits = sum(n_hits)

    with tb.open_file(input_file_name + '_interpreted.h5', 'w') as out_file_h5:
        hit_table = out_file_h5.create_table(out_file_h5.root, name='Hits', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))

        # tmp data structures to be filles by ROOT
        data = {}
        for index, pixel_digit in enumerate(pixel_digits):
            column_data = {}
            for branch in pixel_digit.GetListOfBranches():  # loop over the branches
                column_data[branch.GetName()] = np.zeros(shape=1, dtype=np.int32)
                branch.SetAddress(column_data[branch.GetName()].data)
            data[index] = column_data

        # result data structur to be filles in the following loop
        hits = np.zeros((n_total_hits,), dtype=tb.dtype_from_descr(data_struct.HitInfoTable))

        # get file index with lowest event number
        for pixel_digit in pixel_digits:
            pixel_digit.GetEntry(0)
        min_event_number = min([data[index]['event'][0] for index in range(n_files)])
        actual_file_index = np.where(np.array([data[index]['event'][0] for index in range(n_files)]) == min_event_number)[0][0]

        indices = [0] * n_files

        table_index = 0

        actual_data = data[actual_file_index]
        actual_event_number = actual_data['event'][0]
        last_valid_event_number = 0
        last_tdc = 0
        expected_event_number = actual_event_number
        indices[actual_file_index] = 1

        progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=n_total_hits, term_width=80)
        progress_bar.start()

        def add_actual_data(actual_data, table_index):
            if actual_data['column'] >= 0 and actual_data['column'] < 80 and actual_data['row'] >= 0 and actual_data['row'] < 336:
                tdc_interpolation = interp1d(x=charge_calibration_values, y=tdc_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=0)
                tdc = tdc_interpolation(actual_data['charge'])
                tot_interpolation = interp1d(x=charge_calibration_values, y=tot_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=0)
                tot = tot_interpolation(actual_data['charge'])

                if math.isnan(tdc):  # do not add hits where tdc is nan, these pixel have a very high threshold or do not work
                    return table_index

                if tdc == 0 and actual_data['charge'] > 10000:  # no calibration for TDC due to high charge, thus mark as TDC overflow event
                    hits[table_index]['event_status'] |= 0b0000010000000000
                    tdc = 4095

                if tot == 0 and actual_data['charge'] > 10000:  # no calibration for TOT due to high charge, thus set max tot
                    tot = 13

                hits[table_index]['event_status'] |= 0b0000000100000000
                hits[table_index]['event_number'] = actual_data['event'][0].astype(np.int64)
                hits[table_index]['column'] = (actual_data['column'] + 1).astype(np.uint8)
                hits[table_index]['row'] = (actual_data['row'] + 1).astype(np.uint16)
                hits[table_index]['TDC'] = int(actual_data['charge'] / 300.)
                hits[table_index]['tot'] = int(tot)

                table_index += 1
            return table_index

        while True:
            actual_event_number = actual_data['event'][0]
            if (actual_event_number == expected_event_number or actual_event_number == expected_event_number - 1):  # check if event number increases
                actual_index, actual_digits, actual_data = indices[actual_file_index], pixel_digits[actual_file_index], data[actual_file_index]
                table_index = add_actual_data(actual_data, table_index)
            else:  # event number does not increase, thus the events are in another file --> switch file or the event number is missing
                file_event_numbers = [data[file_index]['event'][0] for file_index in range(n_files)]  # all files actual event number
                actual_file_index = np.where(file_event_numbers == min(file_event_numbers))[0][0]
                actual_index, actual_digits, actual_data = indices[actual_file_index], pixel_digits[actual_file_index], data[actual_file_index]
                actual_event_number = actual_data['event'][0]
                table_index = add_actual_data(actual_data, table_index)

            progress_bar.update(table_index)
            expected_event_number = actual_event_number + 1
            actual_digits.GetEntry(actual_index)

            if indices[actual_file_index] < n_hits[actual_file_index]:  # simply stop when the first file is fully iterated
                indices[actual_file_index] += 1
            else:
                break

        # Set missing data and store to file
        hits[:table_index]['LVL1ID'] = hits[:table_index]['event_number'] % 255
        hits[:table_index]['BCID'] = hits[:table_index]['LVL1ID']
        hits[:table_index]['relative_BCID'] = 6
        hit_table.append(hits[:table_index])

        progress_bar.finish()

        for input_file_root in input_files_root:
            input_file_root.Close()
Esempio n. 15
0
def create_hit_table(input_file_name, tdc_calibation_file, plsr_dac_calibation_file, n_sub_files=8):  # loops over all root files and merges the data into a hdf5 file aligned at the event number
    print 'Converting data from CERN ROOT TTree to hdf5 table'
    charge_calibration_values, tdc_calibration, tdc_error, tot_calibration, tot_error = get_charge_calibration(tdc_calibation_file, plsr_dac_calibation_file)

    # add all files that have the input_file_name praefix and load their data
    input_file_names = [input_file_name + '_t%d.root' % index for index in range(n_sub_files) if os.path.isfile(input_file_name + '_t%d.root' % index)]
    n_files = len(input_file_names)
    input_files_root = [r.TFile(file_name, 'read') for file_name in input_file_names]
    pixel_digits = [input_file_root.Get('EventData').Get('Pixel Digits') for input_file_root in input_files_root]
    n_hits = [pixel_digit.GetEntries() for pixel_digit in pixel_digits]  # total pixel hits to analyze
    n_total_hits = sum(n_hits)

    with tb.open_file(input_file_name + '_interpreted.h5', 'w') as out_file_h5:
        hit_table = out_file_h5.create_table(out_file_h5.root, name='Hits_0', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))

        # tmp data structures to be filles by ROOT
        data = {}
        for index, pixel_digit in enumerate(pixel_digits):
            column_data = {}
            for branch in pixel_digit.GetListOfBranches():  # loop over the branches
                column_data[branch.GetName()] = np.zeros(shape=1, dtype=np.int32)
                branch.SetAddress(column_data[branch.GetName()].data)
            data[index] = column_data

        # result data structur to be filles in the following loop
        hits = np.zeros((n_total_hits,), dtype=tb.dtype_from_descr(data_struct.HitInfoTable))

        # get file index with lowest event number
        for pixel_digit in pixel_digits:
            pixel_digit.GetEntry(0)
        min_event_number = min([data[index]['event'][0] for index in range(n_files)])
        actual_file_index = np.where(np.array([data[index]['event'][0] for index in range(n_files)]) == min_event_number)[0][0]

        indices = [0] * n_files

        table_index = 0

        actual_data = data[actual_file_index]
        actual_event_number = actual_data['event'][0]
        last_valid_event_number = 0
        last_tdc = 0
        expected_event_number = actual_event_number
        indices[actual_file_index] = 1

        progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=n_total_hits, term_width=80)
        progress_bar.start()

        def add_actual_data(actual_data, table_index, hits):
            if actual_data['column'] >= col_span[0] and actual_data['column'] < col_span[1] and actual_data['row'] >= row_span[0] and actual_data['row'] < row_span[1]:  # sanity check
                tdc_interpolation = interp1d(x=charge_calibration_values, y=tdc_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=-1)
                tdc_error_interpolation = interp1d(x=charge_calibration_values, y=tdc_error[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=1)
                tdc = tdc_interpolation(actual_data['charge'])[0][0]
#                 if tdc >= 0:
#                     tdc_e = np.abs(tdc_error_interpolation(actual_data['charge'])[0][0])
#                     if tdc_e > 0:
#                         tdc = np.random.normal(tdc, tdc_e, size=1)[0]  # add noise to it from TDC method
                    
                tot_interpolation = interp1d(x=charge_calibration_values, y=tot_calibration[actual_data['column'], actual_data['row']], kind='slinear', bounds_error=False, fill_value=-1)
                tot = tot_interpolation(actual_data['charge'])[0][0]

                if math.isnan(tdc):  # do not add hits where tdc is nan, these pixel have a very high threshold or do not work
                    return table_index

                if tdc == -1:
                    if actual_data['charge'] > 5000:  # no calibration for TDC due to high charge, thus mark as TDC overflow event
                        hits[table_index]['event_status'] |= 0b0000010100000000
                        tdc = 4095
                    else:  # below threshold thus no calibration, do not add hit
                        return table_index

                if tot == -1:
                    if actual_data['charge'] > 5000:  # most likely no calibration for TOT due to high charge, thus set max tot
                        tot = 13
                    else:  # below threshold thus no calibration, do not add hit
                        print 'WARNING: Should never trigger!'
                        return table_index

                hits[table_index]['event_status'] |= 0b0000000100000000  # set TDC and trigger word
                hits[table_index]['event_number'] = actual_data['event'][0].astype(np.int64)
                hits[table_index]['column'] = (actual_data['column'] + 1).astype(np.uint8)
                hits[table_index]['row'] = (actual_data['row'] + 1).astype(np.uint16)
                hits[table_index]['TDC'] = int(round(tdc, 0))
                hits[table_index]['tot'] = int(round(tot, 0))

                table_index += 1
            return table_index           

        while True:
            actual_event_number = actual_data['event'][0]
            if (actual_event_number == expected_event_number or actual_event_number == expected_event_number - 1):  # check if event number increases
                actual_index, actual_digits, actual_data = indices[actual_file_index], pixel_digits[actual_file_index], data[actual_file_index]
                table_index = add_actual_data(actual_data, table_index, hits)
            else:  # event number does not increase, thus the events are maybe in another file --> switch file or the event number is missing
                file_event_numbers = [data[file_index]['event'][0] for file_index in range(n_files)]  # all files actual event number
                actual_file_index = np.where(file_event_numbers == min(file_event_numbers))[0][0]
                actual_index, actual_digits, actual_data = indices[actual_file_index], pixel_digits[actual_file_index], data[actual_file_index]
                actual_event_number = actual_data['event'][0]
                table_index = add_actual_data(actual_data, table_index, hits)

            progress_bar.update(table_index)
            expected_event_number = actual_event_number + 1  # next expected event number does not have to exists, simulated events without hits are possible
            actual_digits.GetEntry(actual_index)

            if indices[actual_file_index] < n_hits[actual_file_index]:  # simply stop when the first file is fully iterated, some data is lost of cause
                indices[actual_file_index] += 1
            else:
                break

        # Set missing data and store to file
        hits[:table_index]['LVL1ID'] = hits[:table_index]['event_number'] % 255
        hits[:table_index]['BCID'] = hits[:table_index]['LVL1ID']
        hits[:table_index]['relative_BCID'] = 6
        hit_table.append(hits[:table_index])

        progress_bar.finish()

        for input_file_root in input_files_root:
            input_file_root.Close()
            
        del hits
            
    # Set data that is common to all hits of one event
    with tb.open_file(input_file_name + '_interpreted.h5', 'a') as out_file_h5:
        hits = out_file_h5.root.Hits_0[:]
        progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=hits.shape[0], term_width=80)
        progress_bar.start()
        actual_event_number = -1
        for ihit in range(hits.shape[0]):
            if hits[ihit]['event_number'] != actual_event_number:  # Detect new event
                if actual_event_number != -1:
                    for jhit in xrange(ihit - 1, -1, -1):
                        if hits[jhit]['event_number'] != actual_event_number:
                            break
                        hits[jhit]['TDC'] = max_tdc
                        hits[jhit]['event_status'] = max_event_status
                actual_event_number, max_tdc, max_event_status = hits[ihit]['event_number'], hits[ihit]['TDC'], hits[ihit]['event_status']
            else:
                max_event_status |= hits[ihit]['event_status']
                if max_tdc < hits[ihit]['TDC']:
                    max_tdc = hits[ihit]['TDC']
            progress_bar.update(ihit)
#             out_file_h5.remove_node(out_file_h5.root.Hits)
        hit_table = out_file_h5.create_table(out_file_h5.root, name='Hits', description=data_struct.HitInfoTable, title='hit_data', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
        hit_table.append(hits)
        progress_bar.finish()
def create_hit_table(
    input_file_name,
    tdc_calibation_file,
    plsr_dac_calibation_file,
    n_sub_files=8
):  # loops over all root files and merges the data into a hdf5 file aligned at the event number
    print 'Converting data from CERN ROOT TTree to hdf5 table'
    charge_calibration_values, tdc_calibration, tdc_error, tot_calibration, tot_error = get_charge_calibration(
        tdc_calibation_file, plsr_dac_calibation_file)

    # add all files that have the input_file_name praefix and load their data
    input_file_names = [
        input_file_name + '_t%d.root' % index for index in range(n_sub_files)
        if os.path.isfile(input_file_name + '_t%d.root' % index)
    ]
    n_files = len(input_file_names)
    input_files_root = [
        r.TFile(file_name, 'read') for file_name in input_file_names
    ]
    pixel_digits = [
        input_file_root.Get('EventData').Get('Pixel Digits')
        for input_file_root in input_files_root
    ]
    n_hits = [pixel_digit.GetEntries()
              for pixel_digit in pixel_digits]  # total pixel hits to analyze
    n_total_hits = sum(n_hits)

    with tb.open_file(input_file_name + '_interpreted.h5', 'w') as out_file_h5:
        hit_table = out_file_h5.create_table(
            out_file_h5.root,
            name='Hits',
            description=data_struct.HitInfoTable,
            title='hit_data',
            filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))

        # tmp data structures to be filles by ROOT
        data = {}
        for index, pixel_digit in enumerate(pixel_digits):
            column_data = {}
            for branch in pixel_digit.GetListOfBranches(
            ):  # loop over the branches
                column_data[branch.GetName()] = np.zeros(shape=1,
                                                         dtype=np.int32)
                branch.SetAddress(column_data[branch.GetName()].data)
            data[index] = column_data

        # result data structur to be filles in the following loop
        hits = np.zeros((n_total_hits, ),
                        dtype=tb.dtype_from_descr(data_struct.HitInfoTable))

        # get file index with lowest event number
        for pixel_digit in pixel_digits:
            pixel_digit.GetEntry(0)
        min_event_number = min(
            [data[index]['event'][0] for index in range(n_files)])
        actual_file_index = np.where(
            np.array([data[index]['event'][0]
                      for index in range(n_files)]) == min_event_number)[0][0]

        indices = [0] * n_files

        table_index = 0

        actual_data = data[actual_file_index]
        actual_event_number = actual_data['event'][0]
        last_valid_event_number = 0
        last_tdc = 0
        expected_event_number = actual_event_number
        indices[actual_file_index] = 1

        progress_bar = progressbar.ProgressBar(widgets=[
            '',
            progressbar.Percentage(), ' ',
            progressbar.Bar(marker='*', left='|', right='|'), ' ',
            progressbar.AdaptiveETA()
        ],
                                               maxval=n_total_hits,
                                               term_width=80)
        progress_bar.start()

        def add_actual_data(actual_data, table_index):
            if actual_data['column'] >= 0 and actual_data[
                    'column'] < 80 and actual_data['row'] >= 0 and actual_data[
                        'row'] < 336:
                tdc_interpolation = interp1d(
                    x=charge_calibration_values,
                    y=tdc_calibration[actual_data['column'],
                                      actual_data['row']],
                    kind='slinear',
                    bounds_error=False,
                    fill_value=0)
                tdc = tdc_interpolation(actual_data['charge'])
                tot_interpolation = interp1d(
                    x=charge_calibration_values,
                    y=tot_calibration[actual_data['column'],
                                      actual_data['row']],
                    kind='slinear',
                    bounds_error=False,
                    fill_value=0)
                tot = tot_interpolation(actual_data['charge'])

                if math.isnan(
                        tdc
                ):  # do not add hits where tdc is nan, these pixel have a very high threshold or do not work
                    return table_index

                if tdc == 0 and actual_data[
                        'charge'] > 10000:  # no calibration for TDC due to high charge, thus mark as TDC overflow event
                    hits[table_index]['event_status'] |= 0b0000010000000000
                    tdc = 4095

                if tot == 0 and actual_data[
                        'charge'] > 10000:  # no calibration for TOT due to high charge, thus set max tot
                    tot = 13

                hits[table_index]['event_status'] |= 0b0000000100000000
                hits[table_index]['event_number'] = actual_data['event'][
                    0].astype(np.int64)
                hits[table_index]['column'] = (actual_data['column'] +
                                               1).astype(np.uint8)
                hits[table_index]['row'] = (actual_data['row'] + 1).astype(
                    np.uint16)
                hits[table_index]['TDC'] = int(actual_data['charge'] / 300.)
                hits[table_index]['tot'] = int(tot)

                table_index += 1
            return table_index

        while True:
            actual_event_number = actual_data['event'][0]
            if (actual_event_number == expected_event_number
                    or actual_event_number == expected_event_number -
                    1):  # check if event number increases
                actual_index, actual_digits, actual_data = indices[
                    actual_file_index], pixel_digits[actual_file_index], data[
                        actual_file_index]
                table_index = add_actual_data(actual_data, table_index)
            else:  # event number does not increase, thus the events are in another file --> switch file or the event number is missing
                file_event_numbers = [
                    data[file_index]['event'][0]
                    for file_index in range(n_files)
                ]  # all files actual event number
                actual_file_index = np.where(
                    file_event_numbers == min(file_event_numbers))[0][0]
                actual_index, actual_digits, actual_data = indices[
                    actual_file_index], pixel_digits[actual_file_index], data[
                        actual_file_index]
                actual_event_number = actual_data['event'][0]
                table_index = add_actual_data(actual_data, table_index)

            progress_bar.update(table_index)
            expected_event_number = actual_event_number + 1
            actual_digits.GetEntry(actual_index)

            if indices[actual_file_index] < n_hits[
                    actual_file_index]:  # simply stop when the first file is fully iterated
                indices[actual_file_index] += 1
            else:
                break

        # Set missing data and store to file
        hits[:table_index]['LVL1ID'] = hits[:table_index]['event_number'] % 255
        hits[:table_index]['BCID'] = hits[:table_index]['LVL1ID']
        hits[:table_index]['relative_BCID'] = 6
        hit_table.append(hits[:table_index])

        progress_bar.finish()

        for input_file_root in input_files_root:
            input_file_root.Close()
Esempio n. 17
0
        except tb.exceptions.HDF5ExtError:
            logging.info('File in inconsistent state, read again')


if __name__ == "__main__":
    interpreter = PyDataInterpreter()
    histograming = PyDataHistograming()

    interpreter.set_info_output(analysis_configuration['infos'])
    interpreter.set_warning_output(analysis_configuration['warnings'])
    interpreter.set_FEI4B(False if analysis_configuration['chip_flavor'] ==
                          'fei4a' else True)
    interpreter.set_trig_count(analysis_configuration['trig_count'])
    histograming.set_no_scan_parameter()
    histograming.create_occupancy_hist(True)
    histograming.create_rel_bcid_hist(True)
    plt.ion()

    hits = np.empty((analysis_configuration['chunk_size'], ),
                    dtype=tb.dtype_from_descr(data_struct.HitInfoTable)
                    )  # hold the hits per analyze_raw_data call

    start_time = datetime.now()
    analyze_raw_data_file(input_file=analysis_configuration['folder'] +
                          analysis_configuration['scan_name'] + '.h5')
    logging.info('Script runtime %.1f seconds' %
                 (datetime.now() - start_time).total_seconds())

    plt.ioff()
    plt.show()
def pprint_array(array):
    offsets = []
    for column_name in array.dtype.names:
        print column_name, "\t",
        offsets.append(column_name.count(""))
    for row in array:
        print ""
        for i, column in enumerate(row):
            print " " * (offsets[i] / 2), column, "\t",
    print ""


if __name__ == "__main__":
    # create some fake data
    hits = np.ones(shape=(10,), dtype=dtype_from_descr(data_struct.HitInfoTable))
    for i, hit in enumerate(hits):
        hit[0] = i / 2
        hit[1] = i / 2
        hit[2] = i + 2
        hit[3] = i % 2 + 10
        hit[4] = i % 3 + 1
    hits[8]["event_number"] = 3

    # create results arrays to be filled by the clusterizer
    cluster_hits = np.zeros_like(hits, dtype=dtype_from_descr(data_struct.ClusterHitInfoTable))
    cluster = np.zeros_like(hits, dtype=dtype_from_descr(data_struct.ClusterInfoTable))

    # create clusterizer object
    clusterizer = HitClusterizer()