Ejemplo n.º 1
0
def get_time_walk_hist(hit_file, charge_calibration, event_status_select_mask,
                       event_status_condition, hit_selection_conditions,
                       max_timesamp, max_tdc, max_charge):
    with tb.open_file(hit_file, 'r') as in_file_h5:
        cluster_hit_table = in_file_h5.root.ClusterHits

        logging.info(
            'Select hits and create TDC histograms for %d cut conditions',
            len(hit_selection_conditions))
        progress_bar = progressbar.ProgressBar(
            widgets=[
                '',
                progressbar.Percentage(), ' ',
                progressbar.Bar(marker='*', left='|', right='|'), ' ',
                progressbar.AdaptiveETA()
            ],
            maxval=cluster_hit_table.shape[0],
            term_width=80)
        progress_bar.start()
        n_hits, n_selected_hits = 0, 0
        timewalk = np.zeros(shape=(200, max_timesamp), dtype=np.float32)
        for cluster_hits, _ in analysis_utils.data_aligned_at_events(
                cluster_hit_table, chunk_size=10000000):
            n_hits += cluster_hits.shape[0]
            selected_events_cluster_hits = cluster_hits[np.logical_and(
                cluster_hits['TDC'] < max_tdc,
                (cluster_hits['event_status']
                 & event_status_select_mask) == event_status_condition)]
            for _, condition in enumerate(hit_selection_conditions):
                selected_cluster_hits = analysis_utils.select_hits(
                    selected_events_cluster_hits, condition)
                n_selected_hits += selected_cluster_hits.shape[0]
                column_index, row_index, tdc, tdc_timestamp = selected_cluster_hits[
                    'column'] - 1, selected_cluster_hits[
                        'row'] - 1, selected_cluster_hits[
                            'TDC'], selected_cluster_hits['TDC_time_stamp']

                # Charge values for each Col/Row/TDC tuple from per pixel charge calibration
                # and PlsrDAC calibration in electrons
                charge_values = plsr_dac_to_charge(
                    charge_calibration[column_index, row_index,
                                       tdc]).astype(np.float32)

                actual_timewalk, xedges, yedges = np.histogram2d(
                    charge_values,
                    tdc_timestamp,
                    bins=timewalk.shape,
                    range=((0, max_charge), (0, max_timesamp)))
                timewalk += actual_timewalk

            progress_bar.update(n_hits)
        progress_bar.finish()
        logging.info('Selected %d of %d hits = %1.1f percent', n_selected_hits,
                     n_hits,
                     float(n_selected_hits) / float(n_hits) * 100.0)
    return timewalk, xedges, yedges
Ejemplo n.º 2
0
def select_hits_from_cluster_info(input_file_hits,
                                  output_file_hits,
                                  cluster_size_condition,
                                  n_cluster_condition,
                                  chunk_size=4000000):
    ''' Takes a hit table and stores only selected hits into a new table. The selection is done on an event base and events are selected if they have a certain number of cluster or cluster size.
    To increase the analysis speed a event index for the input hit file is created first. Since a cluster hit table can be created to this way of hit selection is
    not needed anymore.

     Parameters
    ----------
    input_file_hits: str
        the input file name with hits
    output_file_hits: str
        the output file name for the hits
    cluster_size_condition: str
        the cluster size condition to select events (e.g.: 'cluster_size_condition <= 2')
    n_cluster_condition: str
        the number of cluster in a event ((e.g.: 'n_cluster_condition == 1')
    '''
    logging.info('Write hits of events from ' + str(input_file_hits) +
                 ' with ' + cluster_size_condition + ' and ' +
                 n_cluster_condition + ' into ' + str(output_file_hits))
    with tb.open_file(input_file_hits, mode="r+") as in_hit_file_h5:
        analysis_utils.index_event_number(in_hit_file_h5.root.Hits)
        analysis_utils.index_event_number(in_hit_file_h5.root.Cluster)
        with tb.open_file(output_file_hits, mode="w") as out_hit_file_h5:
            hit_table_out = out_hit_file_h5.create_table(
                out_hit_file_h5.root,
                name='Hits',
                description=data_struct.HitInfoTable,
                title='hit_data',
                filters=tb.Filters(complib='blosc',
                                   complevel=5,
                                   fletcher32=False))
            cluster_table = in_hit_file_h5.root.Cluster
            last_word_number = 0
            progress_bar = progressbar.ProgressBar(
                widgets=[
                    '',
                    progressbar.Percentage(), ' ',
                    progressbar.Bar(marker='*', left='|', right='|'), ' ',
                    progressbar.AdaptiveETA()
                ],
                maxval=cluster_table.shape[0],
                term_width=80)
            progress_bar.start()
            for data, index in analysis_utils.data_aligned_at_events(
                    cluster_table, chunk_size=chunk_size):
                selected_events_1 = analysis_utils.get_events_with_cluster_size(
                    event_number=data['event_number'],
                    cluster_size=data['size'],
                    condition=cluster_size_condition
                )  # select the events with clusters of a certain size
                selected_events_2 = analysis_utils.get_events_with_n_cluster(
                    event_number=data['event_number'],
                    condition=n_cluster_condition
                )  # select the events with a certain cluster number
                selected_events = analysis_utils.get_events_in_both_arrays(
                    selected_events_1, selected_events_2
                )  # select events with both conditions above
                logging.debug('Selected ' + str(len(selected_events)) +
                              ' events with ' + n_cluster_condition + ' and ' +
                              cluster_size_condition)
                last_word_number = analysis_utils.write_hits_in_events(
                    hit_table_in=in_hit_file_h5.root.Hits,
                    hit_table_out=hit_table_out,
                    events=selected_events,
                    start_hit_word=last_word_number
                )  # write the hits of the selected events into a new table
                progress_bar.update(index)
            progress_bar.finish()
            in_hit_file_h5.root.meta_data.copy(
                out_hit_file_h5.root)  # copy meta_data note to new file
Ejemplo n.º 3
0
def analyse_n_cluster_per_event(scan_base,
                                include_no_cluster=False,
                                time_line_absolute=True,
                                combine_n_readouts=1000,
                                chunk_size=10000000,
                                plot_n_cluster_hists=False,
                                output_pdf=None,
                                output_file=None):
    ''' Determines the number of cluster per event as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts').

    Parameters
    ----------
    scan_base: list of str
        scan base names (e.g.:  ['//data//SCC_50_fei4_self_trigger_scan_390', ]
    include_no_cluster: bool
        Set to true to also consider all events without any hit.
    combine_n_readouts: int
        the number of read outs to combine (e.g. 1000)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen
    '''

    time_stamp = []
    n_cluster = []

    start_time_set = False

    for data_file in scan_base:
        with tb.open_file(data_file + '_interpreted.h5',
                          mode="r+") as in_cluster_file_h5:
            # get data and data pointer
            meta_data_array = in_cluster_file_h5.root.meta_data[:]
            cluster_table = in_cluster_file_h5.root.Cluster

            # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number)
            parameter_ranges = np.column_stack(
                (analysis_utils.get_ranges_from_array(
                    meta_data_array['timestamp_start'][::combine_n_readouts]),
                 analysis_utils.get_ranges_from_array(
                     meta_data_array['event_number'][::combine_n_readouts])))

            # create a event_numer index (important for speed)
            analysis_utils.index_event_number(cluster_table)

            # initialize the analysis and set settings
            analyze_data = AnalyzeRawData()
            analyze_data.create_tot_hist = False
            analyze_data.create_bcid_hist = False

            # variables for read speed up
            index = 0  # index where to start the read out, 0 at the beginning, increased during looping
            best_chunk_size = chunk_size

            total_cluster = cluster_table.shape[0]

            progress_bar = progressbar.ProgressBar(widgets=[
                '',
                progressbar.Percentage(), ' ',
                progressbar.Bar(marker='*', left='|', right='|'), ' ',
                progressbar.AdaptiveETA()
            ],
                                                   maxval=total_cluster,
                                                   term_width=80)
            progress_bar.start()

            # loop over the selected events
            for parameter_index, parameter_range in enumerate(
                    parameter_ranges):
                logging.debug('Analyze time stamp ' + str(parameter_range[0]) +
                              ' and data from events = [' +
                              str(parameter_range[2]) + ',' +
                              str(parameter_range[3]) + '[ ' + str(
                                  int(
                                      float(
                                          float(parameter_index) /
                                          float(len(parameter_ranges)) *
                                          100.0))) + '%')
                analyze_data.reset()  # resets the data of the last analysis

                # loop over the cluster in the actual selected events with optimizations: determine best chunk size, start word index given
                readout_cluster_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
                hist = None
                for clusters, index in analysis_utils.data_aligned_at_events(
                        cluster_table,
                        start_event_number=parameter_range[2],
                        stop_event_number=parameter_range[3],
                        start_index=index,
                        chunk_size=best_chunk_size):
                    n_cluster_per_event = analysis_utils.get_n_cluster_in_events(
                        clusters['event_number']
                    )[:,
                      1]  # array with the number of cluster per event, cluster per event are at least 1
                    if hist is None:
                        hist = np.histogram(n_cluster_per_event,
                                            bins=10,
                                            range=(0, 10))[0]
                    else:
                        hist = np.add(
                            hist,
                            np.histogram(n_cluster_per_event,
                                         bins=10,
                                         range=(0, 10))[0])
                    if include_no_cluster and parameter_range[
                            3] is not None:  # happend for the last readout
                        hist[0] = (parameter_range[3] -
                                   parameter_range[2]) - len(
                                       n_cluster_per_event
                                   )  # add the events without any cluster
                    readout_cluster_len += clusters.shape[0]
                    total_cluster -= len(clusters)
                    progress_bar.update(index)
                best_chunk_size = int(1.5 * readout_cluster_len) if int(
                    1.05 * readout_cluster_len
                ) < chunk_size else chunk_size  # to increase the readout speed, estimated the number of hits for one read instruction

                if plot_n_cluster_hists:
                    plotting.plot_1d_hist(
                        hist,
                        title='Number of cluster per event at ' +
                        str(parameter_range[0]),
                        x_axis_title='Number of cluster',
                        y_axis_title='#',
                        log_y=True,
                        filename=output_pdf)
                hist = hist.astype('f4') / np.sum(
                    hist)  # calculate fraction from total numbers

                if time_line_absolute:
                    time_stamp.append(parameter_range[0])
                else:
                    if not start_time_set:
                        start_time = parameter_ranges[0, 0]
                        start_time_set = True
                    time_stamp.append((parameter_range[0] - start_time) / 60.0)
                n_cluster.append(hist)
            progress_bar.finish()
            if total_cluster != 0:
                logging.warning(
                    'Not all clusters were selected during analysis. Analysis is therefore not exact'
                )

    if time_line_absolute:
        plotting.plot_scatter_time(
            time_stamp,
            n_cluster,
            title='Number of cluster per event as a function of time',
            marker_style='o',
            filename=output_pdf,
            legend=('0 cluster', '1 cluster', '2 cluster',
                    '3 cluster') if include_no_cluster else
            ('0 cluster not plotted', '1 cluster', '2 cluster', '3 cluster'))
    else:
        plotting.plot_scatter(
            time_stamp,
            n_cluster,
            title='Number of cluster per event as a function of time',
            x_label='time [min.]',
            marker_style='o',
            filename=output_pdf,
            legend=('0 cluster', '1 cluster', '2 cluster',
                    '3 cluster') if include_no_cluster else
            ('0 cluster not plotted', '1 cluster', '2 cluster', '3 cluster'))
    if output_file:
        with tb.open_file(output_file, mode="a") as out_file_h5:
            cluster_array = np.array(n_cluster)
            rec_array = np.array(zip(time_stamp, cluster_array[:, 0],
                                     cluster_array[:, 1], cluster_array[:, 2],
                                     cluster_array[:, 3], cluster_array[:, 4],
                                     cluster_array[:, 5]),
                                 dtype=[('time_stamp', float),
                                        ('cluster_0', float),
                                        ('cluster_1', float),
                                        ('cluster_2', float),
                                        ('cluster_3', float),
                                        ('cluster_4', float),
                                        ('cluster_5', float)
                                        ]).view(np.recarray)
            try:
                n_cluster_table = out_file_h5.create_table(
                    out_file_h5.root,
                    name='n_cluster',
                    description=rec_array,
                    title='Cluster per event',
                    filters=tb.Filters(complib='blosc',
                                       complevel=5,
                                       fletcher32=False))
                n_cluster_table[:] = rec_array
            except tb.exceptions.NodeError:
                logging.warning(
                    output_file +
                    ' has already a Beamspot note, do not overwrite existing.')
    return time_stamp, n_cluster
Ejemplo n.º 4
0
def analyze_beam_spot(scan_base,
                      combine_n_readouts=1000,
                      chunk_size=10000000,
                      plot_occupancy_hists=False,
                      output_pdf=None,
                      output_file=None):
    ''' Determines the mean x and y beam spot position as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts'). The occupancy is determined
    for the given combined events and stored into a pdf file. At the end the beam x and y is plotted into a scatter plot with absolute positions in um.

     Parameters
    ----------
    scan_base: list of str
        scan base names (e.g.:  ['//data//SCC_50_fei4_self_trigger_scan_390', ]
    combine_n_readouts: int
        the number of read outs to combine (e.g. 1000)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen
    '''
    time_stamp = []
    x = []
    y = []

    for data_file in scan_base:
        with tb.open_file(data_file + '_interpreted.h5',
                          mode="r+") as in_hit_file_h5:
            # get data and data pointer
            meta_data_array = in_hit_file_h5.root.meta_data[:]
            hit_table = in_hit_file_h5.root.Hits

            # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number)
            parameter_ranges = np.column_stack(
                (analysis_utils.get_ranges_from_array(
                    meta_data_array['timestamp_start'][::combine_n_readouts]),
                 analysis_utils.get_ranges_from_array(
                     meta_data_array['event_number'][::combine_n_readouts])))

            # create a event_numer index (important)
            analysis_utils.index_event_number(hit_table)

            # initialize the analysis and set settings
            analyze_data = AnalyzeRawData()
            analyze_data.create_tot_hist = False
            analyze_data.create_bcid_hist = False
            analyze_data.histogram.set_no_scan_parameter()

            # variables for read speed up
            index = 0  # index where to start the read out, 0 at the beginning, increased during looping
            best_chunk_size = chunk_size

            progress_bar = progressbar.ProgressBar(widgets=[
                '',
                progressbar.Percentage(), ' ',
                progressbar.Bar(marker='*', left='|', right='|'), ' ',
                progressbar.AdaptiveETA()
            ],
                                                   maxval=hit_table.shape[0],
                                                   term_width=80)
            progress_bar.start()

            # loop over the selected events
            for parameter_index, parameter_range in enumerate(
                    parameter_ranges):
                logging.debug('Analyze time stamp ' + str(parameter_range[0]) +
                              ' and data from events = [' +
                              str(parameter_range[2]) + ',' +
                              str(parameter_range[3]) + '[ ' + str(
                                  int(
                                      float(
                                          float(parameter_index) /
                                          float(len(parameter_ranges)) *
                                          100.0))) + '%')
                analyze_data.reset()  # resets the data of the last analysis

                # loop over the hits in the actual selected events with optimizations: determine best chunk size, start word index given
                readout_hit_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
                for hits, index in analysis_utils.data_aligned_at_events(
                        hit_table,
                        start_event_number=parameter_range[2],
                        stop_event_number=parameter_range[3],
                        start_index=index,
                        chunk_size=best_chunk_size):
                    analyze_data.analyze_hits(
                        hits)  # analyze the selected hits in chunks
                    readout_hit_len += hits.shape[0]
                    progress_bar.update(index)
                best_chunk_size = int(1.5 * readout_hit_len) if int(
                    1.05 * readout_hit_len
                ) < chunk_size else chunk_size  # to increase the readout speed, estimated the number of hits for one read instruction

                # get and store results
                occupancy_array = analyze_data.histogram.get_occupancy()
                projection_x = np.sum(occupancy_array, axis=0).ravel()
                projection_y = np.sum(occupancy_array, axis=1).ravel()
                x.append(
                    analysis_utils.get_mean_from_histogram(projection_x,
                                                           bin_positions=range(
                                                               0, 80)))
                y.append(
                    analysis_utils.get_mean_from_histogram(projection_y,
                                                           bin_positions=range(
                                                               0, 336)))
                time_stamp.append(parameter_range[0])
                if plot_occupancy_hists:
                    plotting.plot_occupancy(
                        occupancy_array[:, :, 0],
                        title='Occupancy for events between ' + time.strftime(
                            '%H:%M:%S', time.localtime(parameter_range[0])) +
                        ' and ' + time.strftime(
                            '%H:%M:%S', time.localtime(parameter_range[1])),
                        filename=output_pdf)
            progress_bar.finish()
    plotting.plot_scatter([i * 250 for i in x], [i * 50 for i in y],
                          title='Mean beam position',
                          x_label='x [um]',
                          y_label='y [um]',
                          marker_style='-o',
                          filename=output_pdf)
    if output_file:
        with tb.open_file(output_file, mode="a") as out_file_h5:
            rec_array = np.array(zip(time_stamp, x, y),
                                 dtype=[('time_stamp', float), ('x', float),
                                        ('y', float)])
            try:
                beam_spot_table = out_file_h5.create_table(
                    out_file_h5.root,
                    name='Beamspot',
                    description=rec_array,
                    title='Beam spot position',
                    filters=tb.Filters(complib='blosc',
                                       complevel=5,
                                       fletcher32=False))
                beam_spot_table[:] = rec_array
            except tb.exceptions.NodeError:
                logging.warning(
                    output_file +
                    ' has already a Beamspot note, do not overwrite existing.')
    return time_stamp, x, y
def histogram_tdc_hits(input_file_hits, hit_selection_conditions, event_status_select_mask, event_status_condition, calibation_file=None, max_tdc=2000):
    for condition in hit_selection_conditions:
        logging.info('Histogram tdc hits with %s' % condition)

    def get_charge(max_tdc, tdc_calibration_values, tdc_pixel_calibration):  # return the charge from calibration
        charge_calibration = np.zeros(shape=(80, 336, max_tdc))
        for column in range(80):
            for row in range(336):
                actual_pixel_calibration = tdc_pixel_calibration[column, row, :]
                if np.any(actual_pixel_calibration != 0):
                    interpolation = interp1d(x=actual_pixel_calibration, y=tdc_calibration_values, kind='slinear', bounds_error=False, fill_value=0)
                    charge_calibration[column, row, :] = interpolation(np.arange(max_tdc))
        return charge_calibration

    with tb.openFile(input_file_hits, mode="r") as in_hit_file_h5:
        cluster_hit_table = in_hit_file_h5.root.ClusterHits

        shape_tdc_hist, shape_mean_tdc_hist = (80, 336, max_tdc), (80, 336)
        shape_tdc_timestamp_hist, shape_mean_tdc_timestamp_hist = (80, 336, 256), (80, 336)
        tdc_hists_per_condition = [np.zeros(shape=shape_tdc_hist, dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_timestamp_hists_per_condition = [np.zeros(shape=shape_tdc_timestamp_hist, dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_tdc_hists_per_condition = [np.zeros(shape=shape_mean_tdc_hist, dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_tdc_timestamp_hists_per_condition = [np.zeros(shape=shape_mean_tdc_timestamp_hist, dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []

        n_hits_per_condition = [0 for _ in range(len(hit_selection_conditions) + 2)]  # 1/2 condition are all hits / hits of goode events

        for cluster_hits, _ in analysis_utils.data_aligned_at_events(cluster_hit_table, chunk_size=2e7):
            n_hits_per_condition[0] += cluster_hits.shape[0]
            selected_events_cluster_hits = cluster_hits[(cluster_hits['event_status'] & event_status_select_mask) == event_status_condition]
            n_hits_per_condition[1] += selected_events_cluster_hits.shape[0]
            for index, condition in enumerate(hit_selection_conditions):
                selected_cluster_hits = analysis_utils.select_hits(selected_events_cluster_hits, condition)
                n_hits_per_condition[2 + index] += selected_cluster_hits.shape[0]
                column, row, tdc = selected_cluster_hits['column'] - 1, selected_cluster_hits['row'] - 1, selected_cluster_hits['TDC']
                tdc_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc, shape=shape_tdc_hist)
                mean_tdc_hists_per_condition[index] = np.average(tdc_hists_per_condition[index], axis=2, weights=range(0, max_tdc)) * np.sum(np.arange(0, max_tdc)) / tdc_hists_per_condition[index].sum(axis=2)
                tdc_timestamp = selected_cluster_hits['TDC_time_stamp']
                tdc_timestamp_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc_timestamp, shape=shape_tdc_timestamp_hist)
                mean_tdc_timestamp_hists_per_condition[index] = np.average(tdc_timestamp_hists_per_condition[index], axis=2, weights=range(0, shape_tdc_timestamp_hist[2])) * np.sum(np.arange(0, shape_tdc_timestamp_hist[2])) / tdc_timestamp_hists_per_condition[index].sum(axis=2)

        plotThreeWay(mean_tdc_hists_per_condition[0].T * 1.5625, title='Mean TDC, condition 1', filename='test_tdc.pdf')  # , minimum=50, maximum=250)
        plotThreeWay(mean_tdc_timestamp_hists_per_condition[0].T * 1.5625, title='Mean TDC delay, condition 1', filename='test_tdc_ts.pdf', minimum=20, maximum=60)

        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="w") as out_file_h5:
            for index, condition in enumerate(hit_selection_conditions):
                tdc_hist_result = np.swapaxes(tdc_hists_per_condition[index], 0, 1)
                tdc_timestamp_hist_result = np.swapaxes(tdc_timestamp_hists_per_condition[index], 0, 1)
                out = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcCondition_%d' % index, title='Hist PixelTdc with %s' % condition, atom=tb.Atom.from_dtype(tdc_hist_result.dtype), shape=tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_2 = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcTimestampCondition_%d' % index, title='Hist PixelTdcTimestamp with %s' % condition, atom=tb.Atom.from_dtype(tdc_timestamp_hist_result.dtype), shape=tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out.attrs.dimensions = 'column, row, TDC value'
                out.attrs.condition = condition
                out.attrs.tdc_values = range(max_tdc)
                out_2.attrs.dimensions = 'column, row, TDC time stamp value'
                out_2.attrs.condition = condition
                out_2.attrs.tdc_values = range(shape_tdc_timestamp_hist[2])
                out[:] = tdc_hist_result
                out_2[:] = tdc_timestamp_hist_result

    with PdfPages(input_file_hits[:-3] + '_calibrated_tdc_hists.pdf') as output_pdf:
        logging.info('Create hits selection efficiency histogram for %d conditions' % (len(hit_selection_conditions) + 2))
        labels = ['All Hits', 'Hits of\ngood events']
        for condition in hit_selection_conditions:
            condition = re.sub('[&]', '\n', condition)
            condition = re.sub('[()]', '', condition)
            labels.append(condition)
        plt.bar(range(len(n_hits_per_condition)), n_hits_per_condition, align='center')
        plt.xticks(range(len(n_hits_per_condition)), labels, size=8)
        plt.title('Number of hits for different cuts')
        plt.ylabel('#')
        plt.grid()
        for x, y in zip(np.arange(len(n_hits_per_condition)), n_hits_per_condition):
            plt.annotate('%d' % (float(y) / float(n_hits_per_condition[0]) * 100.) + r'%', xy=(x, y / 2.), xycoords='data', color='grey', size=15)
        output_pdf.savefig()

        if calibation_file is not None:
            with tb.openFile(calibation_file, mode="r") as in_file_h5:
                tdc_calibration = in_file_h5.root.HitOrCalibration[:, :, 1:, 1]
                tdc_calibration_values = in_file_h5.root.HitOrCalibration.attrs.scan_parameter_values[1:]

            charge = get_charge(max_tdc, tdc_calibration_values, tdc_calibration)
            plt.clf()

            with tb.openFile(input_file_hits[:-3] + '_calibrated_tdc_hists.h5', mode="w") as out_file_h5:
                logging.info('Create corrected TDC histogram for %d conditions' % len(hit_selection_conditions))
                for index, condition in enumerate(hit_selection_conditions):
                    c_str = re.sub('[&]', '\n', condition)
                    x, y = [], []
                    for column in range(0, 80, 1):
                        for row in range(0, 336, 1):
                            if tdc_hists_per_condition[0][column, row, :].sum() < analysis_configuration['min_pixel_hits']:
                                continue
                            x.extend(charge[column, row, :].ravel())
                            y.extend(tdc_hists_per_condition[index][column, row, :].ravel())
                    x, y, _ = analysis_utils.get_profile_histogram(np.array(x) * 55., np.array(y), n_bins=120)
                    result = np.zeros(shape=(x.shape[0], ), dtype=[("x", np.float), ("y", np.float)])
                    result['x'], result['y'] = x, y
                    actual_tdc_hist_table = out_file_h5.create_table(out_file_h5.root, name='TdcHistTableCondition%d' % index, description=result.dtype, title='TDC histogram', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    actual_tdc_hist_table.append(result)
                    actual_tdc_hist_table.attrs.condition = condition
                    if index == 0:
                        normalization = 100. / np.amax(y)
                    plt.plot(x, y * normalization, '.', label=c_str)
                # Plot hists into one plot
                plt.plot([27.82 * 55., 27.82 * 55.], [0, 100], label='Threshold %d e' % (28.82 * 55.), linewidth=2)
                plt.ylim((0, 100))
                plt.legend(loc=0, prop={'size': 12})
                plt.xlabel('Charge [e]')
                plt.ylabel('#')
                plt.grid()
                output_pdf.savefig()
def align_events(input_file, output_file, fix_event_number=True, fix_trigger_number=True, chunk_size=20000000):
    ''' Selects only hits from good events and checks the distance between event number and trigger number for each hit.
    If the FE data allowed a successful event recognition the distance is always constant (besides the fact that the trigger number overflows).
    Otherwise the event number is corrected by the trigger number. How often an inconsistency occurs is counted as well as the number of events that had to be corrected.
    Remark: Only one event analyzed wrong shifts all event numbers leading to no correlation! But usually data does not have to be corrected.

    Parameters
    ----------
    input_file : pytables file
    output_file : pytables file
    chunk_size :  int
        How many events are read at once into RAM for correction.
    '''
    logging.info('Align events to trigger number in %s' % input_file)

    with tb.open_file(input_file, 'r') as in_file_h5:
        hit_table = in_file_h5.root.Hits
        jumps = []  # variable to determine the jumps in the event-number to trigger-number offset
        n_fixed_hits = 0  # events that were fixed

        with tb.open_file(output_file, 'w') as out_file_h5:
            hit_table_description = data_struct.HitInfoTable().columns.copy()
            hit_table_out = out_file_h5.createTable(out_file_h5.root, name='Hits', description=hit_table_description, title='Selected hits for test beam analysis', filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False), chunkshape=(chunk_size,))
            # Correct hit event number
            for hits, _ in analysis_utils.data_aligned_at_events(hit_table, chunk_size=chunk_size):
                if fix_trigger_number is True:
                    selection = np.logical_or((hits['trigger_status'] & 0b00000001) == 0b00000001,
                                              (hits['event_status'] & 0b0000000000000010) == 0b0000000000000010)
                    selected_te_hits = np.where(selection)[0]  # select both events with and without hit that have trigger error flag set

                    assert selected_te_hits[0] > 0
                    tmp_trigger_number = hits['trigger_number'].astype(np.int32)

                    # save trigger and event number for plotting correlation between trigger number and event number
                    event_number, trigger_number = hits['event_number'].copy(), hits['trigger_number'].copy()

                    offset = (hits['trigger_number'][selected_te_hits] - hits['trigger_number'][selected_te_hits - 1] - hits['event_number'][selected_te_hits] + hits['event_number'][selected_te_hits - 1]).astype(np.int32)  # save jumps in trigger number
                    offset_tot = np.cumsum(offset)

                    offset_tot[offset_tot > 32768] = np.mod(offset_tot[offset_tot > 32768], 32768)
                    offset_tot[offset_tot < -32768] = np.mod(offset_tot[offset_tot < -32768], 32768)

                    for start_hit_index in range(len(selected_te_hits)):
                        start_hit = selected_te_hits[start_hit_index]
                        stop_hit = selected_te_hits[start_hit_index + 1] if start_hit_index < (len(selected_te_hits) - 1) else None
                        tmp_trigger_number[start_hit:stop_hit] -= offset_tot[start_hit_index]

                    tmp_trigger_number[tmp_trigger_number >= 32768] = np.mod(tmp_trigger_number[tmp_trigger_number >= 32768], 32768)
                    tmp_trigger_number[tmp_trigger_number < 0] = 32768 - np.mod(np.abs(tmp_trigger_number[tmp_trigger_number < 0]), 32768)

                    hits['trigger_number'] = tmp_trigger_number

                selected_hits = hits[(hits['event_status'] & 0b0000111111111111) == 0b0000000000000000]  # no error at all

                if fix_event_number is True:
                    selector = (selected_hits['event_number'] != (np.divide(selected_hits['event_number'] + 1, 32768) * 32768 + selected_hits['trigger_number'] - 1))
                    n_fixed_hits += np.count_nonzero(selector)
                    selected_hits['event_number'] = np.divide(selected_hits['event_number'] + 1, 32768) * 32768 + selected_hits['trigger_number'] - 1

                hit_table_out.append(selected_hits)

        jumps = np.unique(np.array(jumps))
        logging.info('Found %d inconsistencies in the event number. %d hits had to be corrected.' % (jumps[jumps != 0].shape[0], n_fixed_hits))

        if fix_trigger_number is True:
            return (output_file, event_number, trigger_number, hits['trigger_number'])
Ejemplo n.º 7
0
def histogram_tdc_hits(input_file_hits, hit_selection_conditions, event_status_select_mask, event_status_condition, calibation_file=None, max_tdc=analysis_configuration['max_tdc'], n_bins=analysis_configuration['n_bins']):
    for condition in hit_selection_conditions:
        logging.info('Histogram tdc hits with %s', condition)

    def get_charge(max_tdc, tdc_calibration_values, tdc_pixel_calibration):  # return the charge from calibration
        charge_calibration = np.zeros(shape=(80, 336, max_tdc))
        for column in range(80):
            for row in range(336):
                actual_pixel_calibration = tdc_pixel_calibration[column, row, :]
                if np.any(actual_pixel_calibration != 0) and np.all(np.isfinite(actual_pixel_calibration)):
                    interpolation = interp1d(x=actual_pixel_calibration, y=tdc_calibration_values, kind='slinear', bounds_error=False, fill_value=0)
                    charge_calibration[column, row, :] = interpolation(np.arange(max_tdc))
        return charge_calibration

    def plot_tdc_tot_correlation(data, condition, output_pdf):
        logging.info('Plot correlation histogram for %s', condition)
        plt.clf()
        data = np.ma.array(data, mask=(data <= 0))
        if np.ma.any(data > 0):
            cmap = cm.get_cmap('jet', 200)
            cmap.set_bad('w')
            plt.title('Correlation with %s' % condition)
            norm = colors.LogNorm()
            z_max = data.max(fill_value=0)
            plt.xlabel('TDC')
            plt.ylabel('TOT')
            im = plt.imshow(data, cmap=cmap, norm=norm, aspect='auto', interpolation='nearest')  # , norm=norm)
            divider = make_axes_locatable(plt.gca())
            plt.gca().invert_yaxis()
            cax = divider.append_axes("right", size="5%", pad=0.1)
            plt.colorbar(im, cax=cax, ticks=np.linspace(start=0, stop=z_max, num=9, endpoint=True))
            output_pdf.savefig()
        else:
            logging.warning('No data for correlation plotting for %s', condition)

    def plot_hits_per_condition(output_pdf):
        logging.info('Plot hits selection efficiency histogram for %d conditions', len(hit_selection_conditions) + 2)
        labels = ['All Hits', 'Hits of\ngood events']
        for condition in hit_selection_conditions:
            condition = re.sub('[&]', '\n', condition)
            condition = re.sub('[()]', '', condition)
            labels.append(condition)
        plt.bar(range(len(n_hits_per_condition)), n_hits_per_condition, align='center')
        plt.xticks(range(len(n_hits_per_condition)), labels, size=8)
        plt.title('Number of hits for different cuts')
        plt.yscale('log')
        plt.ylabel('#')
        plt.grid()
        for x, y in zip(np.arange(len(n_hits_per_condition)), n_hits_per_condition):
            plt.annotate('%d' % (float(y) / float(n_hits_per_condition[0]) * 100.) + r'%', xy=(x, y / 2.), xycoords='data', color='grey', size=15)
        output_pdf.savefig()

    def plot_corrected_tdc_hist(x, y, title, output_pdf, point_style='-'):
        logging.info('Plot TDC hist with TDC calibration')
        plt.clf()
        y /= np.amax(y) if y.shape[0] > 0 else y
        plt.plot(x, y, point_style)
        plt.title(title, size=10)
        plt.xlabel('Charge [PlsrDAC]')
        plt.ylabel('Count [a.u.]')
        plt.grid()
        output_pdf.savefig()

    # Create data
    with tb.openFile(input_file_hits, mode="r") as in_hit_file_h5:
        cluster_hit_table = in_hit_file_h5.root.ClusterHits

        # Result hists, initialized per condition
        pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336, max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336, 256), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_hists_per_condition = [np.zeros(shape=(max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_corr_hists_per_condition = [np.zeros(shape=(max_tdc, 16), dtype=np.uint32) for _ in hit_selection_conditions] if hit_selection_conditions else []

        n_hits_per_condition = [0 for _ in range(len(hit_selection_conditions) + 2)]  # condition 1, 2 are all hits, hits of goode events

        logging.info('Select hits and create TDC histograms for %d cut conditions', len(hit_selection_conditions))
        progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=cluster_hit_table.shape[0], term_width=80)
        progress_bar.start()
        for cluster_hits, _ in analysis_utils.data_aligned_at_events(cluster_hit_table, chunk_size=1e8):
            n_hits_per_condition[0] += cluster_hits.shape[0]
            selected_events_cluster_hits = cluster_hits[np.logical_and(cluster_hits['TDC'] < max_tdc, (cluster_hits['event_status'] & event_status_select_mask) == event_status_condition)]
            n_hits_per_condition[1] += selected_events_cluster_hits.shape[0]
            for index, condition in enumerate(hit_selection_conditions):
                selected_cluster_hits = analysis_utils.select_hits(selected_events_cluster_hits, condition)
                n_hits_per_condition[2 + index] += selected_cluster_hits.shape[0]
                column, row, tdc = selected_cluster_hits['column'] - 1, selected_cluster_hits['row'] - 1, selected_cluster_hits['TDC']
                pixel_tdc_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc, shape=(80, 336, max_tdc))
                mean_pixel_tdc_hists_per_condition[index] = np.average(pixel_tdc_hists_per_condition[index], axis=2, weights=range(0, max_tdc)) * np.sum(np.arange(0, max_tdc)) / pixel_tdc_hists_per_condition[index].sum(axis=2)
                tdc_timestamp = selected_cluster_hits['TDC_time_stamp']
                pixel_tdc_timestamp_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc_timestamp, shape=(80, 336, 256))
                mean_pixel_tdc_timestamp_hists_per_condition[index] = np.average(pixel_tdc_timestamp_hists_per_condition[index], axis=2, weights=range(0, 256)) * np.sum(np.arange(0, 256)) / pixel_tdc_timestamp_hists_per_condition[index].sum(axis=2)
                tdc_hists_per_condition[index] = pixel_tdc_hists_per_condition[index].sum(axis=(0, 1))
                tdc_corr_hists_per_condition[index] += analysis_utils.hist_2d_index(tdc, selected_cluster_hits['tot'], shape=(max_tdc, 16))
            progress_bar.update(n_hits_per_condition[0])
        progress_bar.finish()

        # Take TDC calibration if available and calculate charge for each TDC value and pixel
        if calibation_file is not None:
            with tb.openFile(calibation_file, mode="r") as in_file_calibration_h5:
                tdc_calibration = in_file_calibration_h5.root.HitOrCalibration[:, :, :, 1]
                tdc_calibration_values = in_file_calibration_h5.root.HitOrCalibration.attrs.scan_parameter_values[:]
            charge_calibration = get_charge(max_tdc, tdc_calibration_values, tdc_calibration)
        else:
            charge_calibration = None

        # Store data of result histograms
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="w") as out_file_h5:
            for index, condition in enumerate(hit_selection_conditions):
                pixel_tdc_hist_result = np.swapaxes(pixel_tdc_hists_per_condition[index], 0, 1)
                pixel_tdc_timestamp_hist_result = np.swapaxes(pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_hist_result = np.swapaxes(mean_pixel_tdc_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_timestamp_hist_result = np.swapaxes(mean_pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                tdc_hists_per_condition_result = tdc_hists_per_condition[index]
                tdc_corr_hist_result = np.swapaxes(tdc_corr_hists_per_condition[index], 0, 1)
                # Create result hists
                out_1 = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcCondition_%d' % index, title='Hist Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_hist_result.dtype), shape=pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_2 = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcTimestampCondition_%d' % index, title='Hist Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_timestamp_hist_result.dtype), shape=pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_3 = out_file_h5.createCArray(out_file_h5.root, name='HistMeanPixelTdcCondition_%d' % index, title='Hist Mean Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_hist_result.dtype), shape=mean_pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_4 = out_file_h5.createCArray(out_file_h5.root, name='HistMeanPixelTdcTimestampCondition_%d' % index, title='Hist Mean Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_timestamp_hist_result.dtype), shape=mean_pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_5 = out_file_h5.createCArray(out_file_h5.root, name='HistTdcCondition_%d' % index, title='Hist Tdc with %s' % condition, atom=tb.Atom.from_dtype(tdc_hists_per_condition_result.dtype), shape=tdc_hists_per_condition_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_6 = out_file_h5.createCArray(out_file_h5.root, name='HistTdcCorrCondition_%d' % index, title='Hist Correlation Tdc/Tot with %s' % condition, atom=tb.Atom.from_dtype(tdc_corr_hist_result.dtype), shape=tdc_corr_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                # Add result hists information
                out_1.attrs.dimensions, out_1.attrs.condition, out_1.attrs.tdc_values = 'column, row, TDC value', condition, range(max_tdc)
                out_2.attrs.dimensions, out_2.attrs.condition, out_2.attrs.tdc_values = 'column, row, TDC time stamp value', condition, range(256)
                out_3.attrs.dimensions, out_3.attrs.condition = 'column, row, mean TDC value', condition
                out_4.attrs.dimensions, out_4.attrs.condition = 'column, row, mean TDC time stamp value', condition
                out_5.attrs.dimensions, out_5.attrs.condition = 'PlsrDAC', condition
                out_6.attrs.dimensions, out_6.attrs.condition = 'TDC, TOT', condition
                out_1[:], out_2[:], out_3[:], out_4[:], out_5[:], out_6[:] = pixel_tdc_hist_result, pixel_tdc_timestamp_hist_result, mean_pixel_tdc_hist_result, mean_pixel_tdc_timestamp_hist_result, tdc_hists_per_condition_result, tdc_corr_hist_result

                if charge_calibration is not None:
                    # Select only valid pixel for histograming: they have data and a calibration (that is any charge(TDC) calibration != 0)
                    valid_pixel = np.where(np.logical_and(charge_calibration[:, :, :max_tdc].sum(axis=2) > 0, pixel_tdc_hist_result[:, :, :max_tdc].swapaxes(0, 1).sum(axis=2) > 0))

                    mean_charge_calibration = charge_calibration[valid_pixel][:, :max_tdc].mean(axis=0)
                    mean_tdc_hist = pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].mean(axis=0)
                    result_array = np.rec.array(np.column_stack((mean_charge_calibration, mean_tdc_hist)), dtype=[('charge', float), ('count', float)])
                    out_6 = out_file_h5.create_table(out_file_h5.root, name='HistMeanTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with mean charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_6.attrs.condition = condition
                    out_6.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_6.append(result_array)
                    # Create charge histogram with per pixel TDC(charge) calibration
                    x, y = charge_calibration[valid_pixel][:, :max_tdc].ravel(), np.ravel(pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].ravel())
                    y, x = y[x > 0], x[x > 0]  # remove the hit tdcs without proper calibration plsrDAC(TDC) calibration
                    x, y, yerr = analysis_utils.get_profile_histogram(x, y, n_bins=n_bins)
                    result_array = np.rec.array(np.column_stack((x, y, yerr)), dtype=[('charge', float), ('count', float), ('count_error', float)])
                    out_7 = out_file_h5.create_table(out_file_h5.root, name='HistTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with per pixel charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_7.attrs.condition = condition
                    out_7.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_7.append(result_array)

    # Plot Data
    with PdfPages(input_file_hits[:-3] + '_calibrated_tdc_hists.pdf') as output_pdf:
        plot_hits_per_condition(output_pdf)
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="r") as in_file_h5:
            for node in in_file_h5.root:  # go through the data and plot them
                if 'MeanPixel' in node.name:
                    try:
                        plotThreeWay(np.ma.masked_invalid(node[:]) * 1.5625, title='Mean TDC delay, hits with\n%s' % node._v_attrs.condition if 'Timestamp' in node.name else 'Mean TDC, hits with\n%s' % node._v_attrs.condition, filename=output_pdf)
                    except ValueError:
                        logging.warning('Cannot plot TDC delay')
                elif 'HistTdcCondition' in node.name:
                    hist_1d = node[:]
                    entry_index = np.where(hist_1d != 0)
                    if entry_index[0].shape[0] != 0:
                        max_index = np.amax(entry_index)
                    else:
                        max_index = max_tdc
                    plot_1d_hist(hist_1d[:max_index + 10], title='TDC histogram, hits with\n%s' % node._v_attrs.condition if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits with\n%s' % node._v_attrs.condition, x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                elif 'HistPixelTdc' in node.name:
                    hist_3d = node[:]
                    entry_index = np.where(hist_3d.sum(axis=(0, 1)) != 0)
                    if entry_index[0].shape[0] != 0:
                        max_index = np.amax(entry_index)
                    else:
                        max_index = max_tdc
                    best_pixel_index = np.where(hist_3d.sum(axis=2) == np.amax(node[:].sum(axis=2)))
                    if best_pixel_index[0].shape[0] == 1:  # there could be more than one pixel with most hits
                        plot_1d_hist(hist_3d[best_pixel_index][0, :max_index], title='TDC histogram of pixel %d, %d\n%s' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1, node._v_attrs.condition) if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits of pixel %d, %d' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1), x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                elif 'HistTdcCalibratedCondition' in node.name:
                    plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, per pixel TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition), output_pdf=output_pdf)
                elif 'HistMeanTdcCalibratedCondition' in node.name:
                    plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, mean TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition), output_pdf=output_pdf)
                elif 'HistTdcCorr' in node.name:
                    plot_tdc_tot_correlation(node[:], node._v_attrs.condition, output_pdf)
Ejemplo n.º 8
0
def histogram_cluster_table(analyzed_data_file,
                            output_file,
                            chunk_size=10000000):
    '''Reads in the cluster info table in chunks and histograms the seed pixels into one occupancy array.
    The 3rd dimension of the occupancy array is the number of different scan parameters used

    Parameters
    ----------
    analyzed_data_file : string
        HDF5 filename of the file containing the cluster table. If a scan parameter is given in the meta data, the occupancy histogramming is done per scan parameter step.

    Returns
    -------
    occupancy_array: numpy.array with dimensions (col, row, #scan_parameter)
    '''

    with tb.open_file(analyzed_data_file, mode="r") as in_file_h5:
        with tb.open_file(output_file, mode="w") as out_file_h5:
            histogram = PyDataHistograming()
            histogram.create_occupancy_hist(True)
            scan_parameters = None
            event_number_indices = None
            scan_parameter_indices = None
            try:
                meta_data = in_file_h5.root.meta_data[:]
                scan_parameters = analysis_utils.get_unique_scan_parameter_combinations(
                    meta_data)
                if scan_parameters is not None:
                    scan_parameter_indices = np.array(range(
                        0, len(scan_parameters)),
                                                      dtype='u4')
                    event_number_indices = np.ascontiguousarray(
                        scan_parameters['event_number']).astype(np.uint64)
                    histogram.add_meta_event_index(
                        event_number_indices,
                        array_length=len(scan_parameters['event_number']))
                    histogram.add_scan_parameter(scan_parameter_indices)
                    logging.info(
                        "Add %d different scan parameter(s) for analysis",
                        len(scan_parameters))
                else:
                    logging.info("No scan parameter data provided")
                    histogram.set_no_scan_parameter()
            except tb.exceptions.NoSuchNodeError:
                logging.info("No meta data provided, use no scan parameter")
                histogram.set_no_scan_parameter()

            logging.info('Histogram cluster seeds...')
            progress_bar = progressbar.ProgressBar(
                widgets=[
                    '',
                    progressbar.Percentage(), ' ',
                    progressbar.Bar(marker='*', left='|', right='|'), ' ',
                    progressbar.AdaptiveETA()
                ],
                maxval=in_file_h5.root.Cluster.shape[0],
                term_width=80)
            progress_bar.start()
            total_cluster = 0  # to check analysis
            for cluster, index in analysis_utils.data_aligned_at_events(
                    in_file_h5.root.Cluster, chunk_size=chunk_size):
                total_cluster += len(cluster)
                histogram.add_cluster_seed_hits(cluster, len(cluster))
                progress_bar.update(index)
            progress_bar.finish()

            filter_table = tb.Filters(
                complib='blosc', complevel=5,
                fletcher32=False)  # compression of the written data
            occupancy_array = histogram.get_occupancy().T
            occupancy_array_table = out_file_h5.create_carray(
                out_file_h5.root,
                name='HistOcc',
                title='Occupancy Histogram',
                atom=tb.Atom.from_dtype(occupancy_array.dtype),
                shape=occupancy_array.shape,
                filters=filter_table)
            occupancy_array_table[:] = occupancy_array

            if total_cluster != np.sum(occupancy_array):
                logging.warning(
                    'Analysis shows inconsistent number of cluster used. Check needed!'
                )
            in_file_h5.root.meta_data.copy(
                out_file_h5.root)  # copy meta_data note to new file
Ejemplo n.º 9
0
def histogram_cluster_table(analyzed_data_file, output_file, chunk_size=10000000):
    """Reads in the cluster info table in chunks and histograms the seed pixels into one occupancy array.
    The 3rd dimension of the occupancy array is the number of different scan parameters used

    Parameters
    ----------
    analyzed_data_file : hdf5 file containing the cluster table. If a scan parameter is given in the meta data the occupancy
                        histograming is done per scan parameter.
    Returns
    -------
    occupancy_array: numpy.array with dimensions (col, row, #scan_parameter)
    """

    with tb.openFile(analyzed_data_file, mode="r") as in_file_h5:
        with tb.openFile(output_file, mode="w") as out_file_h5:
            histograming = PyDataHistograming()
            histograming.create_occupancy_hist(True)
            scan_parameters = None
            event_number_indices = None
            scan_parameter_indices = None
            try:
                meta_data = in_file_h5.root.meta_data[:]
                scan_parameters = analysis_utils.get_unique_scan_parameter_combinations(meta_data)
                if scan_parameters is not None:
                    scan_parameter_indices = np.array(range(0, len(scan_parameters)), dtype="u4")
                    event_number_indices = np.ascontiguousarray(scan_parameters["event_number"]).astype(np.uint64)
                    histograming.add_meta_event_index(
                        event_number_indices, array_length=len(scan_parameters["event_number"])
                    )
                    histograming.add_scan_parameter(scan_parameter_indices)
                    logging.info("Add %d different scan parameter(s) for analysis", len(scan_parameters))
                else:
                    logging.info("No scan parameter data provided")
                    histograming.set_no_scan_parameter()
            except tb.exceptions.NoSuchNodeError:
                logging.info("No meta data provided, use no scan parameter")
                histograming.set_no_scan_parameter()

            logging.info("Histogram cluster seeds...")
            progress_bar = progressbar.ProgressBar(
                widgets=[
                    "",
                    progressbar.Percentage(),
                    " ",
                    progressbar.Bar(marker="*", left="|", right="|"),
                    " ",
                    analysis_utils.ETA(),
                ],
                maxval=in_file_h5.root.Cluster.shape[0],
                term_width=80,
            )
            progress_bar.start()
            total_cluster = 0  # to check analysis
            for cluster, index in analysis_utils.data_aligned_at_events(in_file_h5.root.Cluster, chunk_size=chunk_size):
                total_cluster += len(cluster)
                histograming.add_cluster_seed_hits(cluster, len(cluster))
                progress_bar.update(index)
            progress_bar.finish()

            filter_table = tb.Filters(complib="blosc", complevel=5, fletcher32=False)  # compression of the written data
            occupancy_array = histograming.get_occupancy().T
            occupancy_array_table = out_file_h5.createCArray(
                out_file_h5.root,
                name="HistOcc",
                title="Occupancy Histogram",
                atom=tb.Atom.from_dtype(occupancy_array.dtype),
                shape=occupancy_array.shape,
                filters=filter_table,
            )
            occupancy_array_table[:] = occupancy_array

            if total_cluster != np.sum(occupancy_array):
                logging.warning("Analysis shows inconsistent number of cluster used. Check needed!")
            in_file_h5.root.meta_data.copy(out_file_h5.root)  # copy meta_data note to new file
Ejemplo n.º 10
0
def analyze_cluster_size_per_scan_parameter(
    input_file_hits,
    output_file_cluster_size,
    parameter="GDAC",
    max_chunk_size=10000000,
    overwrite_output_files=False,
    output_pdf=None,
):
    """ This method takes multiple hit files and determines the cluster size for different scan parameter values of

     Parameters
    ----------
    input_files_hits: string
    output_file_cluster_size: string
        The data file with the results
    parameter: string
        The name of the parameter to separate the data into (e.g.: PlsrDAC)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    overwrite_output_files: bool
        Set to true to overwrite the output file if it already exists
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen, if False nothing is printed
    """
    logging.info("Analyze the cluster sizes for different " + parameter + " settings for " + input_file_hits)
    if os.path.isfile(output_file_cluster_size) and not overwrite_output_files:  # skip analysis if already done
        logging.info(
            "Analyzed cluster size file " + output_file_cluster_size + " already exists. Skip cluster size analysis."
        )
    else:
        with tb.openFile(output_file_cluster_size, mode="w") as out_file_h5:  # file to write the data into
            filter_table = tb.Filters(complib="blosc", complevel=5, fletcher32=False)  # compression of the written data
            parameter_goup = out_file_h5.createGroup(
                out_file_h5.root, parameter, title=parameter
            )  # note to store the data
            cluster_size_total = None  # final array for the cluster size per GDAC
            with tb.openFile(input_file_hits, mode="r+") as in_hit_file_h5:  # open the actual hit file
                meta_data_array = in_hit_file_h5.root.meta_data[:]
                scan_parameter = analysis_utils.get_scan_parameter(meta_data_array)  # get the scan parameters
                if scan_parameter:  # if a GDAC scan parameter was used analyze the cluster size per GDAC setting
                    scan_parameter_values = scan_parameter[parameter]  # scan parameter settings used
                    if (
                        len(scan_parameter_values) == 1
                    ):  # only analyze per scan step if there are more than one scan step
                        logging.warning(
                            "The file "
                            + str(input_file_hits)
                            + " has no different "
                            + str(parameter)
                            + " parameter values. Omit analysis."
                        )
                    else:
                        logging.info(
                            "Analyze "
                            + input_file_hits
                            + " per scan parameter "
                            + parameter
                            + " for "
                            + str(len(scan_parameter_values))
                            + " values from "
                            + str(np.amin(scan_parameter_values))
                            + " to "
                            + str(np.amax(scan_parameter_values))
                        )
                        event_numbers = analysis_utils.get_meta_data_at_scan_parameter(meta_data_array, parameter)[
                            "event_number"
                        ]  # get the event numbers in meta_data where the scan parameter changes
                        parameter_ranges = np.column_stack(
                            (scan_parameter_values, analysis_utils.get_ranges_from_array(event_numbers))
                        )
                        hit_table = in_hit_file_h5.root.Hits
                        analysis_utils.index_event_number(hit_table)
                        total_hits, total_hits_2, index = 0, 0, 0
                        chunk_size = max_chunk_size
                        # initialize the analysis and set settings
                        analyze_data = AnalyzeRawData()
                        analyze_data.create_cluster_size_hist = True
                        analyze_data.create_cluster_tot_hist = True
                        analyze_data.histograming.set_no_scan_parameter()  # one has to tell the histogramer the # of scan parameters for correct occupancy hist allocation
                        progress_bar = progressbar.ProgressBar(
                            widgets=[
                                "",
                                progressbar.Percentage(),
                                " ",
                                progressbar.Bar(marker="*", left="|", right="|"),
                                " ",
                                analysis_utils.ETA(),
                            ],
                            maxval=hit_table.shape[0],
                            term_width=80,
                        )
                        progress_bar.start()
                        for parameter_index, parameter_range in enumerate(
                            parameter_ranges
                        ):  # loop over the selected events
                            analyze_data.reset()  # resets the data of the last analysis
                            logging.debug(
                                "Analyze GDAC = "
                                + str(parameter_range[0])
                                + " "
                                + str(int(float(float(parameter_index) / float(len(parameter_ranges)) * 100.0)))
                                + "%"
                            )
                            start_event_number = parameter_range[1]
                            stop_event_number = parameter_range[2]
                            logging.debug(
                                "Data from events = [" + str(start_event_number) + "," + str(stop_event_number) + "["
                            )
                            actual_parameter_group = out_file_h5.createGroup(
                                parameter_goup,
                                name=parameter + "_" + str(parameter_range[0]),
                                title=parameter + "_" + str(parameter_range[0]),
                            )
                            # loop over the hits in the actual selected events with optimizations: variable chunk size, start word index given
                            readout_hit_len = (
                                0
                            )  # variable to calculate a optimal chunk size value from the number of hits for speed up
                            for hits, index in analysis_utils.data_aligned_at_events(
                                hit_table,
                                start_event_number=start_event_number,
                                stop_event_number=stop_event_number,
                                start=index,
                                chunk_size=chunk_size,
                            ):
                                total_hits += hits.shape[0]
                                analyze_data.analyze_hits(hits)  # analyze the selected hits in chunks
                                readout_hit_len += hits.shape[0]
                                progress_bar.update(index)
                            chunk_size = (
                                int(1.05 * readout_hit_len)
                                if int(1.05 * readout_hit_len) < max_chunk_size
                                else max_chunk_size
                            )  # to increase the readout speed, estimated the number of hits for one read instruction
                            if (
                                chunk_size < 50
                            ):  # limit the lower chunk size, there can always be a crazy event with more than 20 hits
                                chunk_size = 50
                            # get occupancy hist
                            occupancy = (
                                analyze_data.histograming.get_occupancy()
                            )  # just here to check histograming is consistend

                            # store and plot cluster size hist
                            cluster_size_hist = analyze_data.clusterizer.get_cluster_size_hist()
                            cluster_size_hist_table = out_file_h5.createCArray(
                                actual_parameter_group,
                                name="HistClusterSize",
                                title="Cluster Size Histogram",
                                atom=tb.Atom.from_dtype(cluster_size_hist.dtype),
                                shape=cluster_size_hist.shape,
                                filters=filter_table,
                            )
                            cluster_size_hist_table[:] = cluster_size_hist
                            if output_pdf is not False:
                                plotting.plot_cluster_size(
                                    hist=cluster_size_hist,
                                    title="Cluster size ("
                                    + str(np.sum(cluster_size_hist))
                                    + " entries) for "
                                    + parameter
                                    + " = "
                                    + str(scan_parameter_values[parameter_index]),
                                    filename=output_pdf,
                                )
                            if cluster_size_total is None:  # true if no data was appended to the array yet
                                cluster_size_total = cluster_size_hist
                            else:
                                cluster_size_total = np.vstack([cluster_size_total, cluster_size_hist])

                            total_hits_2 += np.sum(occupancy)
                        progress_bar.finish()
                        if total_hits != total_hits_2:
                            logging.warning("Analysis shows inconsistent number of hits. Check needed!")
                        logging.info("Analyzed %d hits!", total_hits)
            cluster_size_total_out = out_file_h5.createCArray(
                out_file_h5.root,
                name="AllHistClusterSize",
                title="All Cluster Size Histograms",
                atom=tb.Atom.from_dtype(cluster_size_total.dtype),
                shape=cluster_size_total.shape,
                filters=filter_table,
            )
            cluster_size_total_out[:] = cluster_size_total
Ejemplo n.º 11
0
def select_hits(
    input_file_hits,
    output_file_hits,
    condition=None,
    cluster_size_condition=None,
    n_cluster_condition=None,
    chunk_size=5000000,
):
    """ Takes a hit table and stores only selected hits into a new table. The selection of hits is done with a numexp string. Only if
    this expression evaluates to true the hit is taken. One can also select hits from cluster conditions. This selection is done
    on an event basis, meaning events are selected where the cluster condition is true and then hits of these events are taken.

     Parameters
    ----------
    input_file_hits: str
        the input file name with hits
    output_file_hits: str
        the output file name for the hits
    condition: str
        Numexpr string to select hits (e.g.: '(relative_BCID == 6) & (column == row)')
        All hit infos can be used (column, row, ...)
    cluster_size_condition: int
        Hit of events with the given cluster size are selected.
    n_cluster_condition: int
        Hit of events with the given cluster number are selected.
    """
    logging.info("Write hits with " + condition + " into " + str(output_file_hits))
    if cluster_size_condition is None and n_cluster_condition is None:  # no cluster cuts are done
        with tb.openFile(input_file_hits, mode="r+") as in_hit_file_h5:
            analysis_utils.index_event_number(in_hit_file_h5.root.Hits)  # create event index for faster selection
            with tb.openFile(output_file_hits, mode="w") as out_hit_file_h5:
                hit_table_out = out_hit_file_h5.createTable(
                    out_hit_file_h5.root,
                    name="Hits",
                    description=data_struct.HitInfoTable,
                    title="hit_data",
                    filters=tb.Filters(complib="blosc", complevel=5, fletcher32=False),
                )
                analysis_utils.write_hits_in_event_range(
                    hit_table_in=in_hit_file_h5.root.Hits, hit_table_out=hit_table_out, condition=condition
                )  # write the hits of the selected events into a new table
                in_hit_file_h5.root.meta_data.copy(out_hit_file_h5.root)  # copy meta_data note to new file
    else:
        with tb.openFile(
            input_file_hits, mode="r+"
        ) as in_hit_file_h5:  # open file with hit/cluster data with r+ to be able to create index
            analysis_utils.index_event_number(in_hit_file_h5.root.Hits)  # create event index for faster selection
            analysis_utils.index_event_number(in_hit_file_h5.root.Cluster)  # create event index for faster selection
            with tb.openFile(output_file_hits, mode="w") as out_hit_file_h5:
                hit_table_out = out_hit_file_h5.createTable(
                    out_hit_file_h5.root,
                    name="Hits",
                    description=data_struct.HitInfoTable,
                    title="hit_data",
                    filters=tb.Filters(complib="blosc", complevel=5, fletcher32=False),
                )
                cluster_table = in_hit_file_h5.root.Cluster
                last_word_number = 0
                progress_bar = progressbar.ProgressBar(
                    widgets=[
                        "",
                        progressbar.Percentage(),
                        " ",
                        progressbar.Bar(marker="*", left="|", right="|"),
                        " ",
                        analysis_utils.ETA(),
                    ],
                    maxval=cluster_table.shape[0],
                    term_width=80,
                )
                progress_bar.start()
                for data, index in analysis_utils.data_aligned_at_events(cluster_table, chunk_size=chunk_size):
                    if cluster_size_condition is not None:
                        selected_events = analysis_utils.get_events_with_cluster_size(
                            event_number=data["event_number"],
                            cluster_size=data["size"],
                            condition="cluster_size == " + str(cluster_size_condition),
                        )  # select the events with only 1 hit cluster
                        if n_cluster_condition is not None:
                            selected_events_2 = analysis_utils.get_events_with_n_cluster(
                                event_number=data["event_number"], condition="n_cluster == " + str(n_cluster_condition)
                            )  # select the events with only 1 cluster
                            selected_events = selected_events[
                                analysis_utils.in1d_events(selected_events, selected_events_2)
                            ]  # select events with the first two conditions above
                    elif n_cluster_condition is not None:
                        selected_events = analysis_utils.get_events_with_n_cluster(
                            event_number=data["event_number"], condition="n_cluster == " + str(n_cluster_condition)
                        )
                    else:
                        raise RuntimeError("Cannot understand cluster selection criterion")
                    last_word_number = analysis_utils.write_hits_in_events(
                        hit_table_in=in_hit_file_h5.root.Hits,
                        hit_table_out=hit_table_out,
                        events=selected_events,
                        start_hit_word=last_word_number,
                        condition=condition,
                        chunk_size=chunk_size,
                    )  # write the hits of the selected events into a new table
                    progress_bar.update(index)
                progress_bar.finish()
                in_hit_file_h5.root.meta_data.copy(out_hit_file_h5.root)  # copy meta_data note to new file
Ejemplo n.º 12
0
def select_hits_from_cluster_info(
    input_file_hits, output_file_hits, cluster_size_condition, n_cluster_condition, chunk_size=4000000
):
    """ Takes a hit table and stores only selected hits into a new table. The selection is done on an event base and events are selected if they have a certain number of cluster or cluster size.
    To increase the analysis speed a event index for the input hit file is created first. Since a cluster hit table can be created to this way of hit selection is
    not needed anymore.

     Parameters
    ----------
    input_file_hits: str
        the input file name with hits
    output_file_hits: str
        the output file name for the hits
    cluster_size_condition: str
        the cluster size condition to select events (e.g.: 'cluster_size_condition <= 2')
    n_cluster_condition: str
        the number of cluster in a event ((e.g.: 'n_cluster_condition == 1')
    """
    logging.info(
        "Write hits of events from "
        + str(input_file_hits)
        + " with "
        + cluster_size_condition
        + " and "
        + n_cluster_condition
        + " into "
        + str(output_file_hits)
    )
    with tb.openFile(input_file_hits, mode="r+") as in_hit_file_h5:
        analysis_utils.index_event_number(in_hit_file_h5.root.Hits)
        analysis_utils.index_event_number(in_hit_file_h5.root.Cluster)
        with tb.openFile(output_file_hits, mode="w") as out_hit_file_h5:
            hit_table_out = out_hit_file_h5.createTable(
                out_hit_file_h5.root,
                name="Hits",
                description=data_struct.HitInfoTable,
                title="hit_data",
                filters=tb.Filters(complib="blosc", complevel=5, fletcher32=False),
            )
            cluster_table = in_hit_file_h5.root.Cluster
            last_word_number = 0
            progress_bar = progressbar.ProgressBar(
                widgets=[
                    "",
                    progressbar.Percentage(),
                    " ",
                    progressbar.Bar(marker="*", left="|", right="|"),
                    " ",
                    analysis_utils.ETA(),
                ],
                maxval=cluster_table.shape[0],
                term_width=80,
            )
            progress_bar.start()
            for data, index in analysis_utils.data_aligned_at_events(cluster_table, chunk_size=chunk_size):
                selected_events_1 = analysis_utils.get_events_with_cluster_size(
                    event_number=data["event_number"], cluster_size=data["size"], condition=cluster_size_condition
                )  # select the events with clusters of a certain size
                selected_events_2 = analysis_utils.get_events_with_n_cluster(
                    event_number=data["event_number"], condition=n_cluster_condition
                )  # select the events with a certain cluster number
                selected_events = analysis_utils.get_events_in_both_arrays(
                    selected_events_1, selected_events_2
                )  # select events with both conditions above
                logging.debug(
                    "Selected "
                    + str(len(selected_events))
                    + " events with "
                    + n_cluster_condition
                    + " and "
                    + cluster_size_condition
                )
                last_word_number = analysis_utils.write_hits_in_events(
                    hit_table_in=in_hit_file_h5.root.Hits,
                    hit_table_out=hit_table_out,
                    events=selected_events,
                    start_hit_word=last_word_number,
                )  # write the hits of the selected events into a new table
                progress_bar.update(index)
            progress_bar.finish()
            in_hit_file_h5.root.meta_data.copy(out_hit_file_h5.root)  # copy meta_data note to new file
Ejemplo n.º 13
0
def analyse_n_cluster_per_event(
    scan_base,
    include_no_cluster=False,
    time_line_absolute=True,
    combine_n_readouts=1000,
    chunk_size=10000000,
    plot_n_cluster_hists=False,
    output_pdf=None,
    output_file=None,
):
    """ Determines the number of cluster per event as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts').

    Parameters
    ----------
    scan_base: list of str
        scan base names (e.g.:  ['//data//SCC_50_fei4_self_trigger_scan_390', ]
    include_no_cluster: bool
        Set to true to also consider all events without any hit.
    combine_n_readouts: int
        the number of read outs to combine (e.g. 1000)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen
    """

    time_stamp = []
    n_cluster = []

    start_time_set = False

    for data_file in scan_base:
        with tb.openFile(data_file + "_interpreted.h5", mode="r+") as in_cluster_file_h5:
            # get data and data pointer
            meta_data_array = in_cluster_file_h5.root.meta_data[:]
            cluster_table = in_cluster_file_h5.root.Cluster

            # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number)
            parameter_ranges = np.column_stack(
                (
                    analysis_utils.get_ranges_from_array(meta_data_array["timestamp_start"][::combine_n_readouts]),
                    analysis_utils.get_ranges_from_array(meta_data_array["event_number"][::combine_n_readouts]),
                )
            )

            # create a event_numer index (important for speed)
            analysis_utils.index_event_number(cluster_table)

            # initialize the analysis and set settings
            analyze_data = AnalyzeRawData()
            analyze_data.create_tot_hist = False
            analyze_data.create_bcid_hist = False

            # variables for read speed up
            index = 0  # index where to start the read out, 0 at the beginning, increased during looping
            best_chunk_size = chunk_size

            total_cluster = cluster_table.shape[0]

            progress_bar = progressbar.ProgressBar(
                widgets=[
                    "",
                    progressbar.Percentage(),
                    " ",
                    progressbar.Bar(marker="*", left="|", right="|"),
                    " ",
                    analysis_utils.ETA(),
                ],
                maxval=total_cluster,
                term_width=80,
            )
            progress_bar.start()

            # loop over the selected events
            for parameter_index, parameter_range in enumerate(parameter_ranges):
                logging.debug(
                    "Analyze time stamp "
                    + str(parameter_range[0])
                    + " and data from events = ["
                    + str(parameter_range[2])
                    + ","
                    + str(parameter_range[3])
                    + "[ "
                    + str(int(float(float(parameter_index) / float(len(parameter_ranges)) * 100.0)))
                    + "%"
                )
                analyze_data.reset()  # resets the data of the last analysis

                # loop over the cluster in the actual selected events with optimizations: determine best chunk size, start word index given
                readout_cluster_len = (
                    0
                )  # variable to calculate a optimal chunk size value from the number of hits for speed up
                hist = None
                for clusters, index in analysis_utils.data_aligned_at_events(
                    cluster_table,
                    start_event_number=parameter_range[2],
                    stop_event_number=parameter_range[3],
                    start=index,
                    chunk_size=best_chunk_size,
                ):
                    n_cluster_per_event = analysis_utils.get_n_cluster_in_events(clusters["event_number"])[
                        :, 1
                    ]  # array with the number of cluster per event, cluster per event are at least 1
                    if hist is None:
                        hist = np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0]
                    else:
                        hist = np.add(hist, np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0])
                    if include_no_cluster and parameter_range[3] is not None:  # happend for the last readout
                        hist[0] = (parameter_range[3] - parameter_range[2]) - len(
                            n_cluster_per_event
                        )  # add the events without any cluster
                    readout_cluster_len += clusters.shape[0]
                    total_cluster -= len(clusters)
                    progress_bar.update(index)
                best_chunk_size = (
                    int(1.5 * readout_cluster_len) if int(1.05 * readout_cluster_len) < chunk_size else chunk_size
                )  # to increase the readout speed, estimated the number of hits for one read instruction

                if plot_n_cluster_hists:
                    plotting.plot_1d_hist(
                        hist,
                        title="Number of cluster per event at " + str(parameter_range[0]),
                        x_axis_title="Number of cluster",
                        y_axis_title="#",
                        log_y=True,
                        filename=output_pdf,
                    )
                hist = hist.astype("f4") / np.sum(hist)  # calculate fraction from total numbers

                if time_line_absolute:
                    time_stamp.append(parameter_range[0])
                else:
                    if not start_time_set:
                        start_time = parameter_ranges[0, 0]
                        start_time_set = True
                    time_stamp.append((parameter_range[0] - start_time) / 60.0)
                n_cluster.append(hist)
            progress_bar.finish()
            if total_cluster != 0:
                logging.warning("Not all clusters were selected during analysis. Analysis is therefore not exact")

    if time_line_absolute:
        plotting.plot_scatter_time(
            time_stamp,
            n_cluster,
            title="Number of cluster per event as a function of time",
            marker_style="o",
            filename=output_pdf,
            legend=("0 cluster", "1 cluster", "2 cluster", "3 cluster")
            if include_no_cluster
            else ("0 cluster not plotted", "1 cluster", "2 cluster", "3 cluster"),
        )
    else:
        plotting.plot_scatter(
            time_stamp,
            n_cluster,
            title="Number of cluster per event as a function of time",
            x_label="time [min.]",
            marker_style="o",
            filename=output_pdf,
            legend=("0 cluster", "1 cluster", "2 cluster", "3 cluster")
            if include_no_cluster
            else ("0 cluster not plotted", "1 cluster", "2 cluster", "3 cluster"),
        )
    if output_file:
        with tb.openFile(output_file, mode="a") as out_file_h5:
            cluster_array = np.array(n_cluster)
            rec_array = np.array(
                zip(
                    time_stamp,
                    cluster_array[:, 0],
                    cluster_array[:, 1],
                    cluster_array[:, 2],
                    cluster_array[:, 3],
                    cluster_array[:, 4],
                    cluster_array[:, 5],
                ),
                dtype=[
                    ("time_stamp", float),
                    ("cluster_0", float),
                    ("cluster_1", float),
                    ("cluster_2", float),
                    ("cluster_3", float),
                    ("cluster_4", float),
                    ("cluster_5", float),
                ],
            ).view(np.recarray)
            try:
                n_cluster_table = out_file_h5.createTable(
                    out_file_h5.root,
                    name="n_cluster",
                    description=rec_array,
                    title="Cluster per event",
                    filters=tb.Filters(complib="blosc", complevel=5, fletcher32=False),
                )
                n_cluster_table[:] = rec_array
            except tb.exceptions.NodeError:
                logging.warning(output_file + " has already a Beamspot note, do not overwrite existing.")
    return time_stamp, n_cluster
Ejemplo n.º 14
0
def analyze_beam_spot(
    scan_base,
    combine_n_readouts=1000,
    chunk_size=10000000,
    plot_occupancy_hists=False,
    output_pdf=None,
    output_file=None,
):
    """ Determines the mean x and y beam spot position as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts'). The occupancy is determined
    for the given combined events and stored into a pdf file. At the end the beam x and y is plotted into a scatter plot with absolute positions in um.

     Parameters
    ----------
    scan_base: list of str
        scan base names (e.g.:  ['//data//SCC_50_fei4_self_trigger_scan_390', ]
    combine_n_readouts: int
        the number of read outs to combine (e.g. 1000)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen
    """
    time_stamp = []
    x = []
    y = []

    for data_file in scan_base:
        with tb.openFile(data_file + "_interpreted.h5", mode="r+") as in_hit_file_h5:
            # get data and data pointer
            meta_data_array = in_hit_file_h5.root.meta_data[:]
            hit_table = in_hit_file_h5.root.Hits

            # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number)
            parameter_ranges = np.column_stack(
                (
                    analysis_utils.get_ranges_from_array(meta_data_array["timestamp_start"][::combine_n_readouts]),
                    analysis_utils.get_ranges_from_array(meta_data_array["event_number"][::combine_n_readouts]),
                )
            )

            # create a event_numer index (important)
            analysis_utils.index_event_number(hit_table)

            # initialize the analysis and set settings
            analyze_data = AnalyzeRawData()
            analyze_data.create_tot_hist = False
            analyze_data.create_bcid_hist = False
            analyze_data.histograming.set_no_scan_parameter()

            # variables for read speed up
            index = 0  # index where to start the read out, 0 at the beginning, increased during looping
            best_chunk_size = chunk_size

            progress_bar = progressbar.ProgressBar(
                widgets=[
                    "",
                    progressbar.Percentage(),
                    " ",
                    progressbar.Bar(marker="*", left="|", right="|"),
                    " ",
                    analysis_utils.ETA(),
                ],
                maxval=hit_table.shape[0],
                term_width=80,
            )
            progress_bar.start()

            # loop over the selected events
            for parameter_index, parameter_range in enumerate(parameter_ranges):
                logging.debug(
                    "Analyze time stamp "
                    + str(parameter_range[0])
                    + " and data from events = ["
                    + str(parameter_range[2])
                    + ","
                    + str(parameter_range[3])
                    + "[ "
                    + str(int(float(float(parameter_index) / float(len(parameter_ranges)) * 100.0)))
                    + "%"
                )
                analyze_data.reset()  # resets the data of the last analysis

                # loop over the hits in the actual selected events with optimizations: determine best chunk size, start word index given
                readout_hit_len = (
                    0
                )  # variable to calculate a optimal chunk size value from the number of hits for speed up
                for hits, index in analysis_utils.data_aligned_at_events(
                    hit_table,
                    start_event_number=parameter_range[2],
                    stop_event_number=parameter_range[3],
                    start=index,
                    chunk_size=best_chunk_size,
                ):
                    analyze_data.analyze_hits(hits)  # analyze the selected hits in chunks
                    readout_hit_len += hits.shape[0]
                    progress_bar.update(index)
                best_chunk_size = (
                    int(1.5 * readout_hit_len) if int(1.05 * readout_hit_len) < chunk_size else chunk_size
                )  # to increase the readout speed, estimated the number of hits for one read instruction

                # get and store results
                occupancy_array = analyze_data.histograming.get_occupancy()
                projection_x = np.sum(occupancy_array, axis=0).ravel()
                projection_y = np.sum(occupancy_array, axis=1).ravel()
                x.append(analysis_utils.get_mean_from_histogram(projection_x, bin_positions=range(0, 80)))
                y.append(analysis_utils.get_mean_from_histogram(projection_y, bin_positions=range(0, 336)))
                time_stamp.append(parameter_range[0])
                if plot_occupancy_hists:
                    plotting.plot_occupancy(
                        occupancy_array[:, :, 0],
                        title="Occupancy for events between "
                        + time.strftime("%H:%M:%S", time.localtime(parameter_range[0]))
                        + " and "
                        + time.strftime("%H:%M:%S", time.localtime(parameter_range[1])),
                        filename=output_pdf,
                    )
            progress_bar.finish()
    plotting.plot_scatter(
        [i * 250 for i in x],
        [i * 50 for i in y],
        title="Mean beam position",
        x_label="x [um]",
        y_label="y [um]",
        marker_style="-o",
        filename=output_pdf,
    )
    if output_file:
        with tb.openFile(output_file, mode="a") as out_file_h5:
            rec_array = np.array(zip(time_stamp, x, y), dtype=[("time_stamp", float), ("x", float), ("y", float)])
            try:
                beam_spot_table = out_file_h5.createTable(
                    out_file_h5.root,
                    name="Beamspot",
                    description=rec_array,
                    title="Beam spot position",
                    filters=tb.Filters(complib="blosc", complevel=5, fletcher32=False),
                )
                beam_spot_table[:] = rec_array
            except tb.exceptions.NodeError:
                logging.warning(output_file + " has already a Beamspot note, do not overwrite existing.")
    return time_stamp, x, y
Ejemplo n.º 15
0
def select_hits(input_file_hits,
                output_file_hits,
                condition=None,
                cluster_size_condition=None,
                n_cluster_condition=None,
                chunk_size=5000000):
    ''' Takes a hit table and stores only selected hits into a new table. The selection of hits is done with a numexp string. Only if
    this expression evaluates to true the hit is taken. One can also select hits from cluster conditions. This selection is done
    on an event basis, meaning events are selected where the cluster condition is true and then hits of these events are taken.

     Parameters
    ----------
    input_file_hits: str
        the input file name with hits
    output_file_hits: str
        the output file name for the hits
    condition: str
        Numexpr string to select hits (e.g.: '(relative_BCID == 6) & (column == row)')
        All hit infos can be used (column, row, ...)
    cluster_size_condition: int
        Hit of events with the given cluster size are selected.
    n_cluster_condition: int
        Hit of events with the given cluster number are selected.
    '''
    logging.info('Write hits with ' + condition + ' into ' +
                 str(output_file_hits))
    if cluster_size_condition is None and n_cluster_condition is None:  # no cluster cuts are done
        with tb.open_file(input_file_hits, mode="r+") as in_hit_file_h5:
            analysis_utils.index_event_number(
                in_hit_file_h5.root.Hits
            )  # create event index for faster selection
            with tb.open_file(output_file_hits, mode="w") as out_hit_file_h5:
                hit_table_out = out_hit_file_h5.create_table(
                    out_hit_file_h5.root,
                    name='Hits',
                    description=data_struct.HitInfoTable,
                    title='hit_data',
                    filters=tb.Filters(complib='blosc',
                                       complevel=5,
                                       fletcher32=False))
                analysis_utils.write_hits_in_event_range(
                    hit_table_in=in_hit_file_h5.root.Hits,
                    hit_table_out=hit_table_out,
                    condition=condition
                )  # write the hits of the selected events into a new table
                in_hit_file_h5.root.meta_data.copy(
                    out_hit_file_h5.root)  # copy meta_data note to new file
    else:
        with tb.open_file(
                input_file_hits, mode="r+"
        ) as in_hit_file_h5:  # open file with hit/cluster data with r+ to be able to create index
            analysis_utils.index_event_number(
                in_hit_file_h5.root.Hits
            )  # create event index for faster selection
            analysis_utils.index_event_number(
                in_hit_file_h5.root.Cluster
            )  # create event index for faster selection
            with tb.open_file(output_file_hits, mode="w") as out_hit_file_h5:
                hit_table_out = out_hit_file_h5.create_table(
                    out_hit_file_h5.root,
                    name='Hits',
                    description=data_struct.HitInfoTable,
                    title='hit_data',
                    filters=tb.Filters(complib='blosc',
                                       complevel=5,
                                       fletcher32=False))
                cluster_table = in_hit_file_h5.root.Cluster
                last_word_number = 0
                progress_bar = progressbar.ProgressBar(
                    widgets=[
                        '',
                        progressbar.Percentage(), ' ',
                        progressbar.Bar(marker='*', left='|', right='|'), ' ',
                        progressbar.AdaptiveETA()
                    ],
                    maxval=cluster_table.shape[0],
                    term_width=80)
                progress_bar.start()
                for data, index in analysis_utils.data_aligned_at_events(
                        cluster_table, chunk_size=chunk_size):
                    if cluster_size_condition is not None:
                        selected_events = analysis_utils.get_events_with_cluster_size(
                            event_number=data['event_number'],
                            cluster_size=data['size'],
                            condition='cluster_size == ' +
                            str(cluster_size_condition)
                        )  # select the events with only 1 hit cluster
                        if n_cluster_condition is not None:
                            selected_events_2 = analysis_utils.get_events_with_n_cluster(
                                event_number=data['event_number'],
                                condition='n_cluster == ' +
                                str(n_cluster_condition)
                            )  # select the events with only 1 cluster
                            selected_events = selected_events[
                                analysis_utils.in1d_events(
                                    selected_events, selected_events_2
                                )]  # select events with the first two conditions above
                    elif n_cluster_condition is not None:
                        selected_events = analysis_utils.get_events_with_n_cluster(
                            event_number=data['event_number'],
                            condition='n_cluster == ' +
                            str(n_cluster_condition))
                    else:
                        raise RuntimeError(
                            'Cannot understand cluster selection criterion')
                    last_word_number = analysis_utils.write_hits_in_events(
                        hit_table_in=in_hit_file_h5.root.Hits,
                        hit_table_out=hit_table_out,
                        events=selected_events,
                        start_hit_word=last_word_number,
                        condition=condition,
                        chunk_size=chunk_size
                    )  # write the hits of the selected events into a new table
                    progress_bar.update(index)
                progress_bar.finish()
                in_hit_file_h5.root.meta_data.copy(
                    out_hit_file_h5.root)  # copy meta_data note to new file
Ejemplo n.º 16
0
def analyze_cluster_size_per_scan_parameter(input_file_hits,
                                            output_file_cluster_size,
                                            parameter='GDAC',
                                            max_chunk_size=10000000,
                                            overwrite_output_files=False,
                                            output_pdf=None):
    ''' This method takes multiple hit files and determines the cluster size for different scan parameter values of

     Parameters
    ----------
    input_files_hits: string
    output_file_cluster_size: string
        The data file with the results
    parameter: string
        The name of the parameter to separate the data into (e.g.: PlsrDAC)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    overwrite_output_files: bool
        Set to true to overwrite the output file if it already exists
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen, if False nothing is printed
    '''
    logging.info('Analyze the cluster sizes for different ' + parameter +
                 ' settings for ' + input_file_hits)
    if os.path.isfile(
            output_file_cluster_size
    ) and not overwrite_output_files:  # skip analysis if already done
        logging.info('Analyzed cluster size file ' + output_file_cluster_size +
                     ' already exists. Skip cluster size analysis.')
    else:
        with tb.open_file(
                output_file_cluster_size,
                mode="w") as out_file_h5:  # file to write the data into
            filter_table = tb.Filters(
                complib='blosc', complevel=5,
                fletcher32=False)  # compression of the written data
            parameter_goup = out_file_h5.create_group(
                out_file_h5.root, parameter,
                title=parameter)  # note to store the data
            cluster_size_total = None  # final array for the cluster size per GDAC
            with tb.open_file(
                    input_file_hits,
                    mode="r+") as in_hit_file_h5:  # open the actual hit file
                meta_data_array = in_hit_file_h5.root.meta_data[:]
                scan_parameter = analysis_utils.get_scan_parameter(
                    meta_data_array)  # get the scan parameters
                if scan_parameter:  # if a GDAC scan parameter was used analyze the cluster size per GDAC setting
                    scan_parameter_values = scan_parameter[
                        parameter]  # scan parameter settings used
                    if len(
                            scan_parameter_values
                    ) == 1:  # only analyze per scan step if there are more than one scan step
                        logging.warning('The file ' + str(input_file_hits) +
                                        ' has no different ' + str(parameter) +
                                        ' parameter values. Omit analysis.')
                    else:
                        logging.info('Analyze ' + input_file_hits +
                                     ' per scan parameter ' + parameter +
                                     ' for ' +
                                     str(len(scan_parameter_values)) +
                                     ' values from ' +
                                     str(np.amin(scan_parameter_values)) +
                                     ' to ' +
                                     str(np.amax(scan_parameter_values)))
                        event_numbers = analysis_utils.get_meta_data_at_scan_parameter(
                            meta_data_array, parameter
                        )['event_number']  # get the event numbers in meta_data where the scan parameter changes
                        parameter_ranges = np.column_stack(
                            (scan_parameter_values,
                             analysis_utils.get_ranges_from_array(
                                 event_numbers)))
                        hit_table = in_hit_file_h5.root.Hits
                        analysis_utils.index_event_number(hit_table)
                        total_hits, total_hits_2, index = 0, 0, 0
                        chunk_size = max_chunk_size
                        # initialize the analysis and set settings
                        analyze_data = AnalyzeRawData()
                        analyze_data.create_cluster_size_hist = True
                        analyze_data.create_cluster_tot_hist = True
                        analyze_data.histogram.set_no_scan_parameter(
                        )  # one has to tell histogram the # of scan parameters for correct occupancy hist allocation
                        progress_bar = progressbar.ProgressBar(
                            widgets=[
                                '',
                                progressbar.Percentage(), ' ',
                                progressbar.Bar(marker='*',
                                                left='|',
                                                right='|'), ' ',
                                progressbar.AdaptiveETA()
                            ],
                            maxval=hit_table.shape[0],
                            term_width=80)
                        progress_bar.start()
                        for parameter_index, parameter_range in enumerate(
                                parameter_ranges
                        ):  # loop over the selected events
                            analyze_data.reset(
                            )  # resets the data of the last analysis
                            logging.debug(
                                'Analyze GDAC = ' + str(parameter_range[0]) +
                                ' ' + str(
                                    int(
                                        float(
                                            float(parameter_index) /
                                            float(len(parameter_ranges)) *
                                            100.0))) + '%')
                            start_event_number = parameter_range[1]
                            stop_event_number = parameter_range[2]
                            logging.debug('Data from events = [' +
                                          str(start_event_number) + ',' +
                                          str(stop_event_number) + '[')
                            actual_parameter_group = out_file_h5.create_group(
                                parameter_goup,
                                name=parameter + '_' + str(parameter_range[0]),
                                title=parameter + '_' +
                                str(parameter_range[0]))
                            # loop over the hits in the actual selected events with optimizations: variable chunk size, start word index given
                            readout_hit_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
                            for hits, index in analysis_utils.data_aligned_at_events(
                                    hit_table,
                                    start_event_number=start_event_number,
                                    stop_event_number=stop_event_number,
                                    start_index=index,
                                    chunk_size=chunk_size):
                                total_hits += hits.shape[0]
                                analyze_data.analyze_hits(
                                    hits
                                )  # analyze the selected hits in chunks
                                readout_hit_len += hits.shape[0]
                                progress_bar.update(index)
                            chunk_size = int(1.05 * readout_hit_len) if int(
                                1.05 * readout_hit_len
                            ) < max_chunk_size else max_chunk_size  # to increase the readout speed, estimated the number of hits for one read instruction
                            if chunk_size < 50:  # limit the lower chunk size, there can always be a crazy event with more than 20 hits
                                chunk_size = 50
                            # get occupancy hist
                            occupancy = analyze_data.histogram.get_occupancy(
                            )  # just check here if histogram is consistent

                            # store and plot cluster size hist
                            cluster_size_hist = analyze_data.clusterizer.get_cluster_size_hist(
                            )
                            cluster_size_hist_table = out_file_h5.create_carray(
                                actual_parameter_group,
                                name='HistClusterSize',
                                title='Cluster Size Histogram',
                                atom=tb.Atom.from_dtype(
                                    cluster_size_hist.dtype),
                                shape=cluster_size_hist.shape,
                                filters=filter_table)
                            cluster_size_hist_table[:] = cluster_size_hist
                            if output_pdf is not False:
                                plotting.plot_cluster_size(
                                    hist=cluster_size_hist,
                                    title='Cluster size (' +
                                    str(np.sum(cluster_size_hist)) +
                                    ' entries) for ' + parameter + ' = ' +
                                    str(scan_parameter_values[parameter_index]
                                        ),
                                    filename=output_pdf)
                            if cluster_size_total is None:  # true if no data was appended to the array yet
                                cluster_size_total = cluster_size_hist
                            else:
                                cluster_size_total = np.vstack(
                                    [cluster_size_total, cluster_size_hist])

                            total_hits_2 += np.sum(occupancy)
                        progress_bar.finish()
                        if total_hits != total_hits_2:
                            logging.warning(
                                'Analysis shows inconsistent number of hits. Check needed!'
                            )
                        logging.info('Analyzed %d hits!', total_hits)
            cluster_size_total_out = out_file_h5.create_carray(
                out_file_h5.root,
                name='AllHistClusterSize',
                title='All Cluster Size Histograms',
                atom=tb.Atom.from_dtype(cluster_size_total.dtype),
                shape=cluster_size_total.shape,
                filters=filter_table)
            cluster_size_total_out[:] = cluster_size_total
Ejemplo n.º 17
0
def analyze_hits_per_scan_parameter(analyze_data, scan_parameters=None, chunk_size=50000):
    """Takes the hit table and analyzes the hits per scan parameter

    Parameters
    ----------
    analyze_data : analysis.analyze_raw_data.AnalyzeRawData object with an opened hit file (AnalyzeRawData.out_file_h5) or a
    file name with the hit data given (AnalyzeRawData._analyzed_data_file)
    scan_parameters : list of strings:
        The names of the scan parameters to use
    chunk_size : int:
        The chunk size of one hit table read. The bigger the faster. Too big causes memory errors.
    Returns
    -------
    yields the analysis.analyze_raw_data.AnalyzeRawData for each scan parameter
    """

    if analyze_data.out_file_h5 is None or analyze_data.out_file_h5.isopen == 0:
        in_hit_file_h5 = tb.open_file(analyze_data._analyzed_data_file, "r+")
        opened_file = True
    else:
        in_hit_file_h5 = analyze_data.out_file_h5
        opened_file = False

    meta_data = in_hit_file_h5.root.meta_data[:]  # get the meta data table
    try:
        hit_table = in_hit_file_h5.root.Hits  # get the hit table
    except tb.NoSuchNodeError:
        logging.error("analyze_hits_per_scan_parameter needs a hit table, but no hit table found.")
        return

    meta_data_table_at_scan_parameter = analysis_utils.get_unique_scan_parameter_combinations(
        meta_data, scan_parameters=scan_parameters
    )
    parameter_values = analysis_utils.get_scan_parameters_table_from_meta_data(
        meta_data_table_at_scan_parameter, scan_parameters
    )
    event_number_ranges = analysis_utils.get_ranges_from_array(
        meta_data_table_at_scan_parameter["event_number"]
    )  # get the event number ranges for the different scan parameter settings

    analysis_utils.index_event_number(
        hit_table
    )  # create a event_numer index to select the hits by their event number fast, no needed but important for speed up

    # variables for read speed up
    index = 0  # index where to start the read out of the hit table, 0 at the beginning, increased during looping
    best_chunk_size = (
        chunk_size
    )  # number of hits to copy to RAM during looping, the optimal chunk size is determined during looping

    # loop over the selected events
    for parameter_index, (start_event_number, stop_event_number) in enumerate(event_number_ranges):
        logging.info("Analyze hits for " + str(scan_parameters) + " = " + str(parameter_values[parameter_index]))
        analyze_data.reset()  # resets the front end data of the last analysis step but not the options
        readout_hit_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
        # loop over the hits in the actual selected events with optimizations: determine best chunk size, start word index given
        for hits, index in analysis_utils.data_aligned_at_events(
            hit_table,
            start_event_number=start_event_number,
            stop_event_number=stop_event_number,
            start=index,
            chunk_size=best_chunk_size,
        ):
            analyze_data.analyze_hits(hits, scan_parameter=False)  # analyze the selected hits in chunks
            readout_hit_len += hits.shape[0]
        best_chunk_size = (
            int(1.5 * readout_hit_len)
            if int(1.05 * readout_hit_len) < chunk_size and int(1.05 * readout_hit_len) > 1e3
            else chunk_size
        )  # to increase the readout speed, estimated the number of hits for one read instruction
        file_name = (
            " ".join(re.findall("[a-zA-Z0-9]+", str(scan_parameters)))
            + "_"
            + " ".join(re.findall("[a-zA-Z0-9]+", str(parameter_values[parameter_index])))
        )
        analyze_data._create_additional_hit_data(safe_to_file=False)
        analyze_data._create_additional_cluster_data(safe_to_file=False)
        yield analyze_data, file_name

    if opened_file:
        in_hit_file_h5.close()
Ejemplo n.º 18
0
def analyze_hits_per_scan_parameter(analyze_data,
                                    scan_parameters=None,
                                    chunk_size=50000):
    '''Takes the hit table and analyzes the hits per scan parameter

    Parameters
    ----------
    analyze_data : analysis.analyze_raw_data.AnalyzeRawData object with an opened hit file (AnalyzeRawData.out_file_h5) or a
    file name with the hit data given (AnalyzeRawData._analyzed_data_file)
    scan_parameters : list of strings:
        The names of the scan parameters to use
    chunk_size : int:
        The chunk size of one hit table read. The bigger the faster. Too big causes memory errors.
    Returns
    -------
    yields the analysis.analyze_raw_data.AnalyzeRawData for each scan parameter
    '''

    if analyze_data.out_file_h5 is None or analyze_data.out_file_h5.isopen == 0:
        in_hit_file_h5 = tb.open_file(analyze_data._analyzed_data_file, 'r+')
        close_file = True
    else:
        in_hit_file_h5 = analyze_data.out_file_h5
        close_file = False

    meta_data = in_hit_file_h5.root.meta_data[:]  # get the meta data table
    try:
        hit_table = in_hit_file_h5.root.Hits  # get the hit table
    except tb.NoSuchNodeError:
        logging.error(
            'analyze_hits_per_scan_parameter needs a hit table, but no hit table found.'
        )
        return

    meta_data_table_at_scan_parameter = analysis_utils.get_unique_scan_parameter_combinations(
        meta_data, scan_parameters=scan_parameters)
    parameter_values = analysis_utils.get_scan_parameters_table_from_meta_data(
        meta_data_table_at_scan_parameter, scan_parameters)
    event_number_ranges = analysis_utils.get_ranges_from_array(
        meta_data_table_at_scan_parameter['event_number']
    )  # get the event number ranges for the different scan parameter settings

    analysis_utils.index_event_number(
        hit_table
    )  # create a event_numer index to select the hits by their event number fast, no needed but important for speed up

    # variables for read speed up
    index = 0  # index where to start the read out of the hit table, 0 at the beginning, increased during looping
    best_chunk_size = chunk_size  # number of hits to copy to RAM during looping, the optimal chunk size is determined during looping

    # loop over the selected events
    for parameter_index, (start_event_number,
                          stop_event_number) in enumerate(event_number_ranges):
        logging.info('Analyze hits for ' + str(scan_parameters) + ' = ' +
                     str(parameter_values[parameter_index]))
        analyze_data.reset(
        )  # resets the front end data of the last analysis step but not the options
        readout_hit_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
        # loop over the hits in the actual selected events with optimizations: determine best chunk size, start word index given
        for hits, index in analysis_utils.data_aligned_at_events(
                hit_table,
                start_event_number=start_event_number,
                stop_event_number=stop_event_number,
                start_index=index,
                chunk_size=best_chunk_size):
            analyze_data.analyze_hits(
                hits,
                scan_parameter=False)  # analyze the selected hits in chunks
            readout_hit_len += hits.shape[0]
        best_chunk_size = int(
            1.5 * readout_hit_len
        ) if int(1.05 * readout_hit_len) < chunk_size and int(
            1.05 * readout_hit_len
        ) > 1e3 else chunk_size  # to increase the readout speed, estimated the number of hits for one read instruction
        file_name = " ".join(re.findall(
            "[a-zA-Z0-9]+", str(scan_parameters))) + '_' + " ".join(
                re.findall("[a-zA-Z0-9]+",
                           str(parameter_values[parameter_index])))
        analyze_data._create_additional_hit_data(safe_to_file=False)
        analyze_data._create_additional_cluster_data(safe_to_file=False)
        yield analyze_data, file_name

    if close_file:
        in_hit_file_h5.close()
def histogram_tdc_hits(input_file_hits, hit_selection_conditions, event_status_select_mask, event_status_condition, calibration_file=None, correct_calibration=None, max_tdc=1000, ignore_disabled_regions=True, n_bins=200, plot_data=True):
    for condition in hit_selection_conditions:
        logging.info('Histogram TDC hits with %s', condition)

    def get_charge(max_tdc, tdc_calibration_values, tdc_pixel_calibration):  # return the charge from calibration
        charge_calibration = np.zeros(shape=(80, 336, max_tdc))
        for column in range(80):
            for row in range(336):
                actual_pixel_calibration = tdc_pixel_calibration[column, row, :]
                # Only take pixels with at least 3 valid calibration points
                if np.count_nonzero(actual_pixel_calibration != 0) > 2 and np.count_nonzero(np.isfinite(actual_pixel_calibration)) > 2:
                    selected_measurements = np.isfinite(actual_pixel_calibration)  # Select valid calibration steps
                    selected_actual_pixel_calibration = actual_pixel_calibration[selected_measurements]
                    selected_tdc_calibration_values = tdc_calibration_values[selected_measurements]
                    interpolation = interp1d(x=selected_actual_pixel_calibration, y=selected_tdc_calibration_values, kind='slinear', bounds_error=False, fill_value=0)
                    charge_calibration[column, row, :] = interpolation(np.arange(max_tdc))
        return charge_calibration

    def plot_tdc_tot_correlation(data, condition, output_pdf):
        logging.info('Plot correlation histogram for %s', condition)
        plt.clf()
        data = np.ma.array(data, mask=(data <= 0))
        if np.ma.any(data > 0):
            cmap = cm.get_cmap('jet', 200)
            cmap.set_bad('w')
            plt.title('Correlation with %s' % condition)
            norm = colors.LogNorm()
            z_max = data.max(fill_value=0)
            plt.xlabel('TDC')
            plt.ylabel('TOT')
            im = plt.imshow(data, cmap=cmap, norm=norm, aspect='auto', interpolation='nearest')  # , norm=norm)
            divider = make_axes_locatable(plt.gca())
            plt.gca().invert_yaxis()
            cax = divider.append_axes("right", size="5%", pad=0.1)
            plt.colorbar(im, cax=cax, ticks=np.linspace(start=0, stop=z_max, num=9, endpoint=True))
            output_pdf.savefig()
        else:
            logging.warning('No data for correlation plotting for %s', condition)

    def plot_hits_per_condition(output_pdf):
        logging.info('Plot hits selection efficiency histogram for %d conditions', len(hit_selection_conditions) + 2)
        labels = ['All Hits', 'Hits of\ngood events']
        for condition in hit_selection_conditions:
            condition = re.sub('[&]', '\n', condition)
            condition = re.sub('[()]', '', condition)
            labels.append(condition)
        plt.clf()
        plt.bar(range(len(n_hits_per_condition)), n_hits_per_condition, align='center')
        plt.xticks(range(len(n_hits_per_condition)), labels, size=8)
        plt.title('Number of hits for different cuts')
        plt.yscale('log')
        plt.ylabel('#')
        plt.grid()
        for x, y in zip(np.arange(len(n_hits_per_condition)), n_hits_per_condition):
            plt.annotate('%d' % (float(y) / float(n_hits_per_condition[0]) * 100.) + r'%', xy=(x, y / 2.), xycoords='data', color='grey', size=15)
        output_pdf.savefig()

    def plot_corrected_tdc_hist(x, y, title, output_pdf, point_style='-'):
        logging.info('Plot TDC hist with TDC calibration')
        plt.clf()
        y /= np.amax(y) if y.shape[0] > 0 else y
        plt.plot(x, y, point_style)
        plt.title(title, size=10)
        plt.xlabel('Charge [PlsrDAC]')
        plt.ylabel('Count [a.u.]')
        plt.grid()
        output_pdf.savefig()

    def get_calibration_correction(tdc_calibration, tdc_calibration_values, filename_new_calibration):  # correct the TDC calibration with the TDC calib in filename_new_calibration by shifting the means
        with tb.open_file(filename_new_calibration, 'r') as in_file_2:
            charge_calibration_1, charge_calibration_2 = tdc_calibration, in_file_2.root.HitOrCalibration[:, :, :, 1]

            plsr_dacs = tdc_calibration_values
            if not np.all(plsr_dacs == in_file_2.root.HitOrCalibration._v_attrs.scan_parameter_values):
                raise NotImplementedError('The check calibration file has to have the same PlsrDAC values')

            # Valid pixel have a calibration in the new and the old calibration
            valid_pixel = np.where(~np.all((charge_calibration_1 == 0), axis=2) & ~np.all(np.isnan(charge_calibration_1), axis=2) & ~np.all((charge_calibration_2 == 0), axis=2) & ~np.all(np.isnan(charge_calibration_2), axis=2))
            mean_charge_calibration = np.nanmean(charge_calibration_2[valid_pixel], axis=0)
            offset_mean = np.nanmean((charge_calibration_2[valid_pixel] - charge_calibration_1[valid_pixel]), axis=0)

            dPlsrDAC_dTDC = analysis_utils.smooth_differentiation(plsr_dacs, mean_charge_calibration, order=3, smoothness=0, derivation=1)
            plt.clf()
            plt.plot(plsr_dacs, offset_mean / dPlsrDAC_dTDC, '.-', label='PlsrDAC')
            plt.plot(plsr_dacs, offset_mean, '.-', label='TDC')
            plt.grid()
            plt.xlabel('PlsrDAC')
            plt.ylabel('Mean calibration offset')
            plt.legend(loc=0)
            plt.title('Mean offset between TDC calibration data, new - old ')
            plt.savefig(filename_new_calibration[:-3] + '.pdf')
            plt.show()

            return offset_mean

    def delete_disabled_regions(hits, enable_mask):
        n_hits = hits.shape[0]

        # Tread no hits case
        if n_hits == 0:
            return hits

        # Column, row array with True for disabled pixels
        disabled_region = ~enable_mask.astype(np.bool).T.copy()
        n_disabled_pixels = np.count_nonzero(disabled_region)

        # Extend disabled pixel mask by the neighbouring pixels
        neighbour_pixels = [(-1, 0), (1, 0), (0, -1), (0, 1)]  # Disable direct neighbouring pixels
        for neighbour_pixel in neighbour_pixels:
            disabled_region = np.logical_or(disabled_region, shift(disabled_region, shift=neighbour_pixel, cval=0))

        logging.info('Masking %d additional pixel neighbouring %d disabled pixels', np.count_nonzero(disabled_region) - n_disabled_pixels, n_disabled_pixels)

        # Make 1D selection array with disabled pixels
        disabled_pixels = np.where(disabled_region)
        disabled_pixels_1d = (disabled_pixels[0] + 1) * disabled_region.shape[1] + (disabled_pixels[1] + 1)  # + 1 because pixel index 0,0 has column/row = 1

        hits_1d = hits['column'].astype(np.uint32) * disabled_region.shape[1] + hits['row']  # change dtype to fit new number
        hits = hits[np.in1d(hits_1d, disabled_pixels_1d, invert=True)]

        logging.info('Lost %d hits (%d percent) due to disabling neighbours', n_hits - hits.shape[0], (1. - float(hits.shape[0]) / n_hits) * 100)

        return hits

    # Create data
    with tb.open_file(input_file_hits, mode="r") as in_hit_file_h5:
        cluster_hit_table = in_hit_file_h5.root.ClusterHits
        try:
            enabled_pixels = in_hit_file_h5.root.ClusterHits._v_attrs.enabled_pixels[:]
        except AttributeError:  # Old and simulate data do not have this info
            logging.warning('No enabled pixel mask found in data! Assume all pixels are enabled.')
            enabled_pixels = np.ones(shape=(336, 80))

        # Result hists, initialized per condition
        pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336, max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336, 256), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_hists_per_condition = [np.zeros(shape=(max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_corr_hists_per_condition = [np.zeros(shape=(max_tdc, 16), dtype=np.uint32) for _ in hit_selection_conditions] if hit_selection_conditions else []

        n_hits_per_condition = [0 for _ in range(len(hit_selection_conditions) + 2)]  # condition 1, 2 are all hits, hits of goode events

        logging.info('Select hits and create TDC histograms for %d cut conditions', len(hit_selection_conditions))
        progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=cluster_hit_table.shape[0], term_width=80)
        progress_bar.start()
        for cluster_hits, _ in analysis_utils.data_aligned_at_events(cluster_hit_table, chunk_size=10000000):
            n_hits_per_condition[0] += cluster_hits.shape[0]
            selected_events_cluster_hits = cluster_hits[np.logical_and(cluster_hits['TDC'] < max_tdc, (cluster_hits['event_status'] & event_status_select_mask) == event_status_condition)]
            n_hits_per_condition[1] += selected_events_cluster_hits.shape[0]
            for index, condition in enumerate(hit_selection_conditions):
                selected_cluster_hits = analysis_utils.select_hits(selected_events_cluster_hits, condition)
                if ignore_disabled_regions:
                    selected_cluster_hits = delete_disabled_regions(hits=selected_cluster_hits, enable_mask=enabled_pixels)

                n_hits_per_condition[2 + index] += selected_cluster_hits.shape[0]
                column, row, tdc = selected_cluster_hits['column'] - 1, selected_cluster_hits['row'] - 1, selected_cluster_hits['TDC']
                pixel_tdc_hists_per_condition[index] += fast_analysis_utils.hist_3d_index(column, row, tdc, shape=(80, 336, max_tdc))
                mean_pixel_tdc_hists_per_condition[index] = np.average(pixel_tdc_hists_per_condition[index], axis=2, weights=range(0, max_tdc)) * np.sum(np.arange(0, max_tdc)) / pixel_tdc_hists_per_condition[index].sum(axis=2)
                tdc_timestamp = selected_cluster_hits['TDC_time_stamp']
                pixel_tdc_timestamp_hists_per_condition[index] += fast_analysis_utils.hist_3d_index(column, row, tdc_timestamp, shape=(80, 336, 256))
                mean_pixel_tdc_timestamp_hists_per_condition[index] = np.average(pixel_tdc_timestamp_hists_per_condition[index], axis=2, weights=range(0, 256)) * np.sum(np.arange(0, 256)) / pixel_tdc_timestamp_hists_per_condition[index].sum(axis=2)
                tdc_hists_per_condition[index] = pixel_tdc_hists_per_condition[index].sum(axis=(0, 1))
                tdc_corr_hists_per_condition[index] += fast_analysis_utils.hist_2d_index(tdc, selected_cluster_hits['tot'], shape=(max_tdc, 16))
            progress_bar.update(n_hits_per_condition[0])
        progress_bar.finish()

        # Take TDC calibration if available and calculate charge for each TDC value and pixel
        if calibration_file is not None:
            with tb.open_file(calibration_file, mode="r") as in_file_calibration_h5:
                tdc_calibration = in_file_calibration_h5.root.HitOrCalibration[:, :, :, 1]
                tdc_calibration_values = in_file_calibration_h5.root.HitOrCalibration.attrs.scan_parameter_values[:]
                if correct_calibration is not None:
                    tdc_calibration += get_calibration_correction(tdc_calibration, tdc_calibration_values, correct_calibration)
            charge_calibration = get_charge(max_tdc, tdc_calibration_values, tdc_calibration)
        else:
            charge_calibration = None

        # Store data of result histograms
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="w") as out_file_h5:
            for index, condition in enumerate(hit_selection_conditions):
                pixel_tdc_hist_result = np.swapaxes(pixel_tdc_hists_per_condition[index], 0, 1)
                pixel_tdc_timestamp_hist_result = np.swapaxes(pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_hist_result = np.swapaxes(mean_pixel_tdc_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_timestamp_hist_result = np.swapaxes(mean_pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                tdc_hists_per_condition_result = tdc_hists_per_condition[index]
                tdc_corr_hist_result = np.swapaxes(tdc_corr_hists_per_condition[index], 0, 1)
                # Create result hists
                out_1 = out_file_h5.create_carray(out_file_h5.root, name='HistPixelTdcCondition_%d' % index, title='Hist Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_hist_result.dtype), shape=pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_2 = out_file_h5.create_carray(out_file_h5.root, name='HistPixelTdcTimestampCondition_%d' % index, title='Hist Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_timestamp_hist_result.dtype), shape=pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_3 = out_file_h5.create_carray(out_file_h5.root, name='HistMeanPixelTdcCondition_%d' % index, title='Hist Mean Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_hist_result.dtype), shape=mean_pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_4 = out_file_h5.create_carray(out_file_h5.root, name='HistMeanPixelTdcTimestampCondition_%d' % index, title='Hist Mean Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_timestamp_hist_result.dtype), shape=mean_pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_5 = out_file_h5.create_carray(out_file_h5.root, name='HistTdcCondition_%d' % index, title='Hist Tdc with %s' % condition, atom=tb.Atom.from_dtype(tdc_hists_per_condition_result.dtype), shape=tdc_hists_per_condition_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_6 = out_file_h5.create_carray(out_file_h5.root, name='HistTdcCorrCondition_%d' % index, title='Hist Correlation Tdc/Tot with %s' % condition, atom=tb.Atom.from_dtype(tdc_corr_hist_result.dtype), shape=tdc_corr_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                # Add result hists information
                out_1.attrs.dimensions, out_1.attrs.condition, out_1.attrs.tdc_values = 'column, row, TDC value', condition, range(max_tdc)
                out_2.attrs.dimensions, out_2.attrs.condition, out_2.attrs.tdc_values = 'column, row, TDC time stamp value', condition, range(256)
                out_3.attrs.dimensions, out_3.attrs.condition = 'column, row, mean TDC value', condition
                out_4.attrs.dimensions, out_4.attrs.condition = 'column, row, mean TDC time stamp value', condition
                out_5.attrs.dimensions, out_5.attrs.condition = 'PlsrDAC', condition
                out_6.attrs.dimensions, out_6.attrs.condition = 'TDC, TOT', condition
                out_1[:], out_2[:], out_3[:], out_4[:], out_5[:], out_6[:] = pixel_tdc_hist_result, pixel_tdc_timestamp_hist_result, mean_pixel_tdc_hist_result, mean_pixel_tdc_timestamp_hist_result, tdc_hists_per_condition_result, tdc_corr_hist_result

                if charge_calibration is not None:
                    # Select only valid pixel for histogramming: they have data and a calibration (that is any charge(TDC) calibration != 0)
                    valid_pixel = np.where(np.logical_and(charge_calibration[:, :, :max_tdc].sum(axis=2) > 0, pixel_tdc_hist_result[:, :, :max_tdc].swapaxes(0, 1).sum(axis=2) > 0))
                    # Create charge histogram with mean TDC calibration
                    mean_charge_calibration = charge_calibration[valid_pixel][:, :max_tdc].mean(axis=0)
                    mean_tdc_hist = pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].mean(axis=0)
                    result_array = np.rec.array(np.column_stack((mean_charge_calibration, mean_tdc_hist)), dtype=[('charge', float), ('count', float)])
                    out_7 = out_file_h5.create_table(out_file_h5.root, name='HistMeanTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with mean charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_7.attrs.condition = condition
                    out_7.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_7.attrs.n_hits = pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].sum()
                    out_7.append(result_array)
                    # Create charge histogram with per pixel TDC calibration
                    x, y = charge_calibration[valid_pixel][:, :max_tdc].ravel(), np.ravel(pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].ravel())
                    y_hist, x_hist = y[x > 0], x[x > 0]  # remove the hit tdcs without proper calibration plsrDAC(TDC) calibration
                    x, y, yerr = analysis_utils.get_profile_histogram(x_hist, y_hist, n_bins=n_bins)
                    result_array = np.rec.array(np.column_stack((x, y, yerr)), dtype=[('charge', float), ('count', float), ('count_error', float)])
                    out_8 = out_file_h5.create_table(out_file_h5.root, name='HistTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with per pixel charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_8.attrs.condition = condition
                    out_8.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_8.attrs.n_hits = y_hist.sum()
                    out_8.append(result_array)

    # Plot Data
    if plot_data:
        with PdfPages(input_file_hits[:-3] + '_calibrated_tdc_hists.pdf') as output_pdf:
            plot_hits_per_condition(output_pdf)
            with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="r") as in_file_h5:
                for node in in_file_h5.root:  # go through the data and plot them
                    if 'MeanPixel' in node.name:
                        try:
                            plot_three_way(np.ma.masked_invalid(node[:]) * 1.5625, title='Mean TDC delay, hits with\n%s' % node._v_attrs.condition[:80] if 'Timestamp' in node.name else 'Mean TDC, hits with\n%s' % node._v_attrs.condition[:80], filename=output_pdf)
                        except ValueError:
                            logging.warning('Cannot plot TDC delay')
                    elif 'HistTdcCondition' in node.name:
                        hist_1d = node[:]
                        entry_index = np.where(hist_1d != 0)
                        if entry_index[0].shape[0] != 0:
                            max_index = np.amax(entry_index)
                        else:
                            max_index = max_tdc
                        plot_1d_hist(hist_1d[:max_index + 10], title='TDC histogram, hits with\n%s' % node._v_attrs.condition[:80] if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits with\n%s' % node._v_attrs.condition[:80], x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                    elif 'HistPixelTdc' in node.name:
                        hist_3d = node[:]
                        entry_index = np.where(hist_3d.sum(axis=(0, 1)) != 0)
                        if entry_index[0].shape[0] != 0:
                            max_index = np.amax(entry_index)
                        else:
                            max_index = max_tdc
                        best_pixel_index = np.where(hist_3d.sum(axis=2) == np.amax(node[:].sum(axis=2)))
                        if best_pixel_index[0].shape[0] == 1:  # there could be more than one pixel with most hits
                            try:
                                plot_1d_hist(hist_3d[best_pixel_index][0, :max_index], title='TDC histogram of pixel %d, %d\n%s' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1, node._v_attrs.condition[:80]) if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits of pixel %d, %d' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1), x_axis_title='TDC' if 'Timestamp' not in node.name[:80] else 'TDC time stamp', filename=output_pdf)
                            except IndexError:
                                logging.warning('Cannot plot pixel TDC histogram')
                    elif 'HistTdcCalibratedCondition' in node.name:
                        plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, per pixel TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition[:80]), output_pdf=output_pdf)
                    elif 'HistMeanTdcCalibratedCondition' in node.name:
                        plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, mean TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition[:80]), output_pdf=output_pdf)
                    elif 'HistTdcCorr' in node.name:
                        plot_tdc_tot_correlation(node[:], node._v_attrs.condition, output_pdf)
Ejemplo n.º 20
0
def histogram_tdc_hits(input_file_hits, hit_selection_conditions, event_status_select_mask, event_status_condition, calibation_file=None, max_tdc=analysis_configuration['max_tdc'], n_bins=analysis_configuration['n_bins']):
    for condition in hit_selection_conditions:
        logging.info('Histogram tdc hits with %s', condition)

    def get_charge(max_tdc, tdc_calibration_values, tdc_pixel_calibration):  # return the charge from calibration
        charge_calibration = np.zeros(shape=(80, 336, max_tdc))
        for column in range(80):
            for row in range(336):
                actual_pixel_calibration = tdc_pixel_calibration[column, row, :]
                if np.any(actual_pixel_calibration != 0) and np.all(np.isfinite(actual_pixel_calibration)):
                    interpolation = interp1d(x=actual_pixel_calibration, y=tdc_calibration_values, kind='slinear', bounds_error=False, fill_value=0)
                    charge_calibration[column, row, :] = interpolation(np.arange(max_tdc))
        return charge_calibration

    def plot_tdc_tot_correlation(data, condition, output_pdf):
        logging.info('Plot correlation histogram for %s', condition)
        plt.clf()
        data = np.ma.array(data, mask=(data <= 0))
        if np.ma.any(data > 0):
            cmap = cm.get_cmap('jet', 200)
            cmap.set_bad('w')
            plt.title('Correlation with %s' % condition)
            norm = colors.LogNorm()
            z_max = data.max(fill_value=0)
            plt.xlabel('TDC')
            plt.ylabel('TOT')
            im = plt.imshow(data, cmap=cmap, norm=norm, aspect='auto', interpolation='nearest')  # , norm=norm)
            divider = make_axes_locatable(plt.gca())
            plt.gca().invert_yaxis()
            cax = divider.append_axes("right", size="5%", pad=0.1)
            plt.colorbar(im, cax=cax, ticks=np.linspace(start=0, stop=z_max, num=9, endpoint=True))
            output_pdf.savefig()
        else:
            logging.warning('No data for correlation plotting for %s', condition)

    def plot_hits_per_condition(output_pdf):
        logging.info('Plot hits selection efficiency histogram for %d conditions', len(hit_selection_conditions) + 2)
        labels = ['All Hits', 'Hits of\ngood events']
        for condition in hit_selection_conditions:
            condition = re.sub('[&]', '\n', condition)
            condition = re.sub('[()]', '', condition)
            labels.append(condition)
        plt.bar(range(len(n_hits_per_condition)), n_hits_per_condition, align='center')
        plt.xticks(range(len(n_hits_per_condition)), labels, size=8)
        plt.title('Number of hits for different cuts')
        plt.yscale('log')
        plt.ylabel('#')
        plt.grid()
        for x, y in zip(np.arange(len(n_hits_per_condition)), n_hits_per_condition):
            plt.annotate('%d' % (float(y) / float(n_hits_per_condition[0]) * 100.) + r'%', xy=(x, y / 2.), xycoords='data', color='grey', size=15)
        output_pdf.savefig()

    def plot_corrected_tdc_hist(x, y, title, output_pdf, point_style='-'):
        logging.info('Plot TDC hist with TDC calibration')
        plt.clf()
        y /= np.amax(y) if y.shape[0] > 0 else y
        plt.plot(x, y, point_style)
        plt.title(title, size=10)
        plt.xlabel('Charge [PlsrDAC]')
        plt.ylabel('Count [a.u.]')
        plt.grid()
        output_pdf.savefig()

    # Create data
    with tb.openFile(input_file_hits, mode="r") as in_hit_file_h5:
        cluster_hit_table = in_hit_file_h5.root.ClusterHits

        # Result hists, initialized per condition
        pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336, max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336, 256), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_hists_per_condition = [np.zeros(shape=(max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_corr_hists_per_condition = [np.zeros(shape=(max_tdc, 16), dtype=np.uint32) for _ in hit_selection_conditions] if hit_selection_conditions else []

        n_hits_per_condition = [0 for _ in range(len(hit_selection_conditions) + 2)]  # condition 1, 2 are all hits, hits of goode events

        logging.info('Select hits and create TDC histograms for %d cut conditions', len(hit_selection_conditions))
        progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=cluster_hit_table.shape[0], term_width=80)
        progress_bar.start()
        for cluster_hits, _ in analysis_utils.data_aligned_at_events(cluster_hit_table, chunk_size=1e8):
            n_hits_per_condition[0] += cluster_hits.shape[0]
            selected_events_cluster_hits = cluster_hits[np.logical_and(cluster_hits['TDC'] < max_tdc, (cluster_hits['event_status'] & event_status_select_mask) == event_status_condition)]
            n_hits_per_condition[1] += selected_events_cluster_hits.shape[0]
            for index, condition in enumerate(hit_selection_conditions):
                selected_cluster_hits = analysis_utils.select_hits(selected_events_cluster_hits, condition)
                n_hits_per_condition[2 + index] += selected_cluster_hits.shape[0]
                column, row, tdc = selected_cluster_hits['column'] - 1, selected_cluster_hits['row'] - 1, selected_cluster_hits['TDC']
                pixel_tdc_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc, shape=(80, 336, max_tdc))
                mean_pixel_tdc_hists_per_condition[index] = np.average(pixel_tdc_hists_per_condition[index], axis=2, weights=range(0, max_tdc)) * np.sum(np.arange(0, max_tdc)) / pixel_tdc_hists_per_condition[index].sum(axis=2)
                tdc_timestamp = selected_cluster_hits['TDC_time_stamp']
                pixel_tdc_timestamp_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc_timestamp, shape=(80, 336, 256))
                mean_pixel_tdc_timestamp_hists_per_condition[index] = np.average(pixel_tdc_timestamp_hists_per_condition[index], axis=2, weights=range(0, 256)) * np.sum(np.arange(0, 256)) / pixel_tdc_timestamp_hists_per_condition[index].sum(axis=2)
                tdc_hists_per_condition[index] = pixel_tdc_hists_per_condition[index].sum(axis=(0, 1))
                tdc_corr_hists_per_condition[index] += analysis_utils.hist_2d_index(tdc, selected_cluster_hits['tot'], shape=(max_tdc, 16))
            progress_bar.update(n_hits_per_condition[0])
        progress_bar.finish()

        # Take TDC calibration if available and calculate charge for each TDC value and pixel
        if calibation_file is not None:
            with tb.openFile(calibation_file, mode="r") as in_file_calibration_h5:
                tdc_calibration = in_file_calibration_h5.root.HitOrCalibration[:, :, :, 1]
                tdc_calibration_values = in_file_calibration_h5.root.HitOrCalibration.attrs.scan_parameter_values[:]
            charge_calibration = get_charge(max_tdc, tdc_calibration_values, tdc_calibration)
        else:
            charge_calibration = None

        # Store data of result histograms
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="w") as out_file_h5:
            for index, condition in enumerate(hit_selection_conditions):
                pixel_tdc_hist_result = np.swapaxes(pixel_tdc_hists_per_condition[index], 0, 1)
                pixel_tdc_timestamp_hist_result = np.swapaxes(pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_hist_result = np.swapaxes(mean_pixel_tdc_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_timestamp_hist_result = np.swapaxes(mean_pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                tdc_hists_per_condition_result = tdc_hists_per_condition[index]
                tdc_corr_hist_result = np.swapaxes(tdc_corr_hists_per_condition[index], 0, 1)
                # Create result hists
                out_1 = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcCondition_%d' % index, title='Hist Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_hist_result.dtype), shape=pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_2 = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcTimestampCondition_%d' % index, title='Hist Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_timestamp_hist_result.dtype), shape=pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_3 = out_file_h5.createCArray(out_file_h5.root, name='HistMeanPixelTdcCondition_%d' % index, title='Hist Mean Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_hist_result.dtype), shape=mean_pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_4 = out_file_h5.createCArray(out_file_h5.root, name='HistMeanPixelTdcTimestampCondition_%d' % index, title='Hist Mean Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_timestamp_hist_result.dtype), shape=mean_pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_5 = out_file_h5.createCArray(out_file_h5.root, name='HistTdcCondition_%d' % index, title='Hist Tdc with %s' % condition, atom=tb.Atom.from_dtype(tdc_hists_per_condition_result.dtype), shape=tdc_hists_per_condition_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_6 = out_file_h5.createCArray(out_file_h5.root, name='HistTdcCorrCondition_%d' % index, title='Hist Correlation Tdc/Tot with %s' % condition, atom=tb.Atom.from_dtype(tdc_corr_hist_result.dtype), shape=tdc_corr_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                # Add result hists information
                out_1.attrs.dimensions, out_1.attrs.condition, out_1.attrs.tdc_values = 'column, row, TDC value', condition, range(max_tdc)
                out_2.attrs.dimensions, out_2.attrs.condition, out_2.attrs.tdc_values = 'column, row, TDC time stamp value', condition, range(256)
                out_3.attrs.dimensions, out_3.attrs.condition = 'column, row, mean TDC value', condition
                out_4.attrs.dimensions, out_4.attrs.condition = 'column, row, mean TDC time stamp value', condition
                out_5.attrs.dimensions, out_5.attrs.condition = 'PlsrDAC', condition
                out_6.attrs.dimensions, out_6.attrs.condition = 'TDC, TOT', condition
                out_1[:], out_2[:], out_3[:], out_4[:], out_5[:], out_6[:] = pixel_tdc_hist_result, pixel_tdc_timestamp_hist_result, mean_pixel_tdc_hist_result, mean_pixel_tdc_timestamp_hist_result, tdc_hists_per_condition_result, tdc_corr_hist_result

                if charge_calibration is not None:
                    # Select only valid pixel for histograming: they have data and a calibration (that is any charge(TDC) calibration != 0)
                    valid_pixel = np.where(np.logical_and(charge_calibration[:, :, :max_tdc].sum(axis=2) > 0, pixel_tdc_hist_result[:, :, :max_tdc].swapaxes(0, 1).sum(axis=2) > 0))

                    mean_charge_calibration = charge_calibration[valid_pixel][:, :max_tdc].mean(axis=0)
                    mean_tdc_hist = pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].mean(axis=0)
                    result_array = np.rec.array(np.column_stack((mean_charge_calibration, mean_tdc_hist)), dtype=[('charge', float), ('count', float)])
                    out_6 = out_file_h5.create_table(out_file_h5.root, name='HistMeanTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with mean charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_6.attrs.condition = condition
                    out_6.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_6.append(result_array)
                    # Create charge histogram with per pixel TDC(charge) calibration
                    x, y = charge_calibration[valid_pixel][:, :max_tdc].ravel(), np.ravel(pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].ravel())
                    y, x = y[x > 0], x[x > 0]  # remove the hit tdcs without proper calibration plsrDAC(TDC) calibration
                    x, y, yerr = analysis_utils.get_profile_histogram(x, y, n_bins=n_bins)
                    result_array = np.rec.array(np.column_stack((x, y, yerr)), dtype=[('charge', float), ('count', float), ('count_error', float)])
                    out_7 = out_file_h5.create_table(out_file_h5.root, name='HistTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with per pixel charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_7.attrs.condition = condition
                    out_7.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_7.append(result_array)

    # Plot Data
    with PdfPages(input_file_hits[:-3] + '_calibrated_tdc_hists.pdf') as output_pdf:
        plot_hits_per_condition(output_pdf)
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="r") as in_file_h5:
            for node in in_file_h5.root:  # go through the data and plot them
                if 'MeanPixel' in node.name:
                    try:
                        plot_three_way(np.ma.masked_invalid(node[:]) * 1.5625, title='Mean TDC delay, hits with\n%s' % node._v_attrs.condition if 'Timestamp' in node.name else 'Mean TDC, hits with\n%s' % node._v_attrs.condition, filename=output_pdf)
                    except ValueError:
                        logging.warning('Cannot plot TDC delay')
                elif 'HistTdcCondition' in node.name:
                    hist_1d = node[:]
                    entry_index = np.where(hist_1d != 0)
                    if entry_index[0].shape[0] != 0:
                        max_index = np.amax(entry_index)
                    else:
                        max_index = max_tdc
                    plot_1d_hist(hist_1d[:max_index + 10], title='TDC histogram, hits with\n%s' % node._v_attrs.condition if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits with\n%s' % node._v_attrs.condition, x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                elif 'HistPixelTdc' in node.name:
                    hist_3d = node[:]
                    entry_index = np.where(hist_3d.sum(axis=(0, 1)) != 0)
                    if entry_index[0].shape[0] != 0:
                        max_index = np.amax(entry_index)
                    else:
                        max_index = max_tdc
                    best_pixel_index = np.where(hist_3d.sum(axis=2) == np.amax(node[:].sum(axis=2)))
                    if best_pixel_index[0].shape[0] == 1:  # there could be more than one pixel with most hits
                        plot_1d_hist(hist_3d[best_pixel_index][0, :max_index], title='TDC histogram of pixel %d, %d\n%s' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1, node._v_attrs.condition) if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits of pixel %d, %d' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1), x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                elif 'HistTdcCalibratedCondition' in node.name:
                    plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, per pixel TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition), output_pdf=output_pdf)
                elif 'HistMeanTdcCalibratedCondition' in node.name:
                    plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, mean TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition), output_pdf=output_pdf)
                elif 'HistTdcCorr' in node.name:
                    plot_tdc_tot_correlation(node[:], node._v_attrs.condition, output_pdf)
Ejemplo n.º 21
0
def align_events(input_file,
                 output_file,
                 fix_event_number=True,
                 fix_trigger_number=True,
                 chunk_size=20000000):
    ''' Selects only hits from good events and checks the distance between event number and trigger number for each hit.
    If the FE data allowed a successful event recognition the distance is always constant (besides the fact that the trigger number overflows).
    Otherwise the event number is corrected by the trigger number. How often an inconsistency occurs is counted as well as the number of events that had to be corrected.
    Remark: Only one event analyzed wrong shifts all event numbers leading to no correlation! But usually data does not have to be corrected.

    Parameters
    ----------
    input_file : pytables file
    output_file : pytables file
    chunk_size :  int
        How many events are read at once into RAM for correction.
    '''
    logging.info('Align events to trigger number in %s' % input_file)

    with tb.open_file(input_file, 'r') as in_file_h5:
        hit_table = in_file_h5.root.Hits
        jumps = [
        ]  # variable to determine the jumps in the event-number to trigger-number offset
        n_fixed_hits = 0  # events that were fixed

        with tb.open_file(output_file, 'w') as out_file_h5:
            hit_table_description = data_struct.HitInfoTable().columns.copy()
            hit_table_out = out_file_h5.create_table(
                out_file_h5.root,
                name='Hits',
                description=hit_table_description,
                title='Selected hits for test beam analysis',
                filters=tb.Filters(complib='blosc',
                                   complevel=5,
                                   fletcher32=False),
                chunkshape=(chunk_size, ))

            # Correct hit event number
            for hits, _ in analysis_utils.data_aligned_at_events(
                    hit_table, chunk_size=chunk_size):

                if not np.all(np.diff(hits['event_number']) >= 0):
                    raise RuntimeError(
                        'The event number does not always increase. This data cannot be used like this!'
                    )

                if fix_trigger_number is True:
                    selection = np.logical_or(
                        (hits['trigger_status'] & 0b00000001) == 0b00000001,
                        (hits['event_status']
                         & 0b0000000000000010) == 0b0000000000000010)
                    selected_te_hits = np.where(
                        selection
                    )[0]  # select both events with and without hit that have trigger error flag set

                    #                     assert selected_te_hits[0] > 0
                    tmp_trigger_number = hits['trigger_number'].astype(
                        np.int32)

                    # save trigger and event number for plotting correlation between trigger number and event number
                    event_number, trigger_number = hits['event_number'].copy(
                    ), hits['trigger_number'].copy()

                    hits['trigger_number'][0] = 0

                    offset = (
                        hits['trigger_number'][selected_te_hits] -
                        hits['trigger_number'][selected_te_hits - 1] -
                        hits['event_number'][selected_te_hits] +
                        hits['event_number'][selected_te_hits - 1]).astype(
                            np.int32)  # save jumps in trigger number
                    offset_tot = np.cumsum(offset)

                    offset_tot[offset_tot > 32768] = np.mod(
                        offset_tot[offset_tot > 32768], 32768)
                    offset_tot[offset_tot < -32768] = np.mod(
                        offset_tot[offset_tot < -32768], 32768)

                    for start_hit_index in range(len(selected_te_hits)):
                        start_hit = selected_te_hits[start_hit_index]
                        stop_hit = selected_te_hits[start_hit_index +
                                                    1] if start_hit_index < (
                                                        len(selected_te_hits) -
                                                        1) else None
                        tmp_trigger_number[start_hit:stop_hit] -= offset_tot[
                            start_hit_index]

                    tmp_trigger_number[tmp_trigger_number >= 32768] = np.mod(
                        tmp_trigger_number[tmp_trigger_number >= 32768], 32768)
                    tmp_trigger_number[
                        tmp_trigger_number < 0] = 32768 - np.mod(
                            np.abs(tmp_trigger_number[tmp_trigger_number < 0]),
                            32768)

                    hits['trigger_number'] = tmp_trigger_number

                selected_hits = hits[(
                    hits['event_status'] & 0b0000100000000000
                ) == 0b0000000000000000]  # select not empty events

                if fix_event_number is True:
                    selector = (
                        selected_hits['event_number'] !=
                        (np.divide(selected_hits['event_number'] + 1, 32768) *
                         32768 + selected_hits['trigger_number'] - 1))
                    n_fixed_hits += np.count_nonzero(selector)
                    selector = selected_hits['event_number'] > selected_hits[
                        'trigger_number']
                    selected_hits['event_number'] = np.divide(
                        selected_hits['event_number'] + 1,
                        32768) * 32768 + selected_hits['trigger_number'] - 1
                    selected_hits['event_number'][selector] = np.divide(
                        selected_hits['event_number'][selector] + 1,
                        32768) * 32768 + 32768 + selected_hits[
                            'trigger_number'][selector] - 1


#                 FIX FOR DIAMOND:
#                 selected_hits['event_number'] -= 1  # FIX FOR DIAMOND EVENT OFFSET

                hit_table_out.append(selected_hits)

        jumps = np.unique(np.array(jumps))
        logging.info(
            'Corrected %d inconsistencies in the event number. %d hits corrected.'
            % (jumps[jumps != 0].shape[0], n_fixed_hits))

        if fix_trigger_number is True:
            return (output_file, event_number, trigger_number,
                    hits['trigger_number'])