Exemple #1
0
def histogram_tdc_hits(input_file_hits, hit_selection_conditions, event_status_select_mask, event_status_condition, calibation_file=None, max_tdc=analysis_configuration['max_tdc'], n_bins=analysis_configuration['n_bins']):
    for condition in hit_selection_conditions:
        logging.info('Histogram tdc hits with %s', condition)

    def get_charge(max_tdc, tdc_calibration_values, tdc_pixel_calibration):  # return the charge from calibration
        charge_calibration = np.zeros(shape=(80, 336, max_tdc))
        for column in range(80):
            for row in range(336):
                actual_pixel_calibration = tdc_pixel_calibration[column, row, :]
                if np.any(actual_pixel_calibration != 0) and np.all(np.isfinite(actual_pixel_calibration)):
                    interpolation = interp1d(x=actual_pixel_calibration, y=tdc_calibration_values, kind='slinear', bounds_error=False, fill_value=0)
                    charge_calibration[column, row, :] = interpolation(np.arange(max_tdc))
        return charge_calibration

    def plot_tdc_tot_correlation(data, condition, output_pdf):
        logging.info('Plot correlation histogram for %s', condition)
        plt.clf()
        data = np.ma.array(data, mask=(data <= 0))
        if np.ma.any(data > 0):
            cmap = cm.get_cmap('jet', 200)
            cmap.set_bad('w')
            plt.title('Correlation with %s' % condition)
            norm = colors.LogNorm()
            z_max = data.max(fill_value=0)
            plt.xlabel('TDC')
            plt.ylabel('TOT')
            im = plt.imshow(data, cmap=cmap, norm=norm, aspect='auto', interpolation='nearest')  # , norm=norm)
            divider = make_axes_locatable(plt.gca())
            plt.gca().invert_yaxis()
            cax = divider.append_axes("right", size="5%", pad=0.1)
            plt.colorbar(im, cax=cax, ticks=np.linspace(start=0, stop=z_max, num=9, endpoint=True))
            output_pdf.savefig()
        else:
            logging.warning('No data for correlation plotting for %s', condition)

    def plot_hits_per_condition(output_pdf):
        logging.info('Plot hits selection efficiency histogram for %d conditions', len(hit_selection_conditions) + 2)
        labels = ['All Hits', 'Hits of\ngood events']
        for condition in hit_selection_conditions:
            condition = re.sub('[&]', '\n', condition)
            condition = re.sub('[()]', '', condition)
            labels.append(condition)
        plt.bar(range(len(n_hits_per_condition)), n_hits_per_condition, align='center')
        plt.xticks(range(len(n_hits_per_condition)), labels, size=8)
        plt.title('Number of hits for different cuts')
        plt.yscale('log')
        plt.ylabel('#')
        plt.grid()
        for x, y in zip(np.arange(len(n_hits_per_condition)), n_hits_per_condition):
            plt.annotate('%d' % (float(y) / float(n_hits_per_condition[0]) * 100.) + r'%', xy=(x, y / 2.), xycoords='data', color='grey', size=15)
        output_pdf.savefig()

    def plot_corrected_tdc_hist(x, y, title, output_pdf, point_style='-'):
        logging.info('Plot TDC hist with TDC calibration')
        plt.clf()
        y /= np.amax(y) if y.shape[0] > 0 else y
        plt.plot(x, y, point_style)
        plt.title(title, size=10)
        plt.xlabel('Charge [PlsrDAC]')
        plt.ylabel('Count [a.u.]')
        plt.grid()
        output_pdf.savefig()

    # Create data
    with tb.openFile(input_file_hits, mode="r") as in_hit_file_h5:
        cluster_hit_table = in_hit_file_h5.root.ClusterHits

        # Result hists, initialized per condition
        pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336, max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336, 256), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_hists_per_condition = [np.zeros(shape=(max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_corr_hists_per_condition = [np.zeros(shape=(max_tdc, 16), dtype=np.uint32) for _ in hit_selection_conditions] if hit_selection_conditions else []

        n_hits_per_condition = [0 for _ in range(len(hit_selection_conditions) + 2)]  # condition 1, 2 are all hits, hits of goode events

        logging.info('Select hits and create TDC histograms for %d cut conditions', len(hit_selection_conditions))
        progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=cluster_hit_table.shape[0], term_width=80)
        progress_bar.start()
        for cluster_hits, _ in analysis_utils.data_aligned_at_events(cluster_hit_table, chunk_size=1e8):
            n_hits_per_condition[0] += cluster_hits.shape[0]
            selected_events_cluster_hits = cluster_hits[np.logical_and(cluster_hits['TDC'] < max_tdc, (cluster_hits['event_status'] & event_status_select_mask) == event_status_condition)]
            n_hits_per_condition[1] += selected_events_cluster_hits.shape[0]
            for index, condition in enumerate(hit_selection_conditions):
                selected_cluster_hits = analysis_utils.select_hits(selected_events_cluster_hits, condition)
                n_hits_per_condition[2 + index] += selected_cluster_hits.shape[0]
                column, row, tdc = selected_cluster_hits['column'] - 1, selected_cluster_hits['row'] - 1, selected_cluster_hits['TDC']
                pixel_tdc_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc, shape=(80, 336, max_tdc))
                mean_pixel_tdc_hists_per_condition[index] = np.average(pixel_tdc_hists_per_condition[index], axis=2, weights=range(0, max_tdc)) * np.sum(np.arange(0, max_tdc)) / pixel_tdc_hists_per_condition[index].sum(axis=2)
                tdc_timestamp = selected_cluster_hits['TDC_time_stamp']
                pixel_tdc_timestamp_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc_timestamp, shape=(80, 336, 256))
                mean_pixel_tdc_timestamp_hists_per_condition[index] = np.average(pixel_tdc_timestamp_hists_per_condition[index], axis=2, weights=range(0, 256)) * np.sum(np.arange(0, 256)) / pixel_tdc_timestamp_hists_per_condition[index].sum(axis=2)
                tdc_hists_per_condition[index] = pixel_tdc_hists_per_condition[index].sum(axis=(0, 1))
                tdc_corr_hists_per_condition[index] += analysis_utils.hist_2d_index(tdc, selected_cluster_hits['tot'], shape=(max_tdc, 16))
            progress_bar.update(n_hits_per_condition[0])
        progress_bar.finish()

        # Take TDC calibration if available and calculate charge for each TDC value and pixel
        if calibation_file is not None:
            with tb.openFile(calibation_file, mode="r") as in_file_calibration_h5:
                tdc_calibration = in_file_calibration_h5.root.HitOrCalibration[:, :, :, 1]
                tdc_calibration_values = in_file_calibration_h5.root.HitOrCalibration.attrs.scan_parameter_values[:]
            charge_calibration = get_charge(max_tdc, tdc_calibration_values, tdc_calibration)
        else:
            charge_calibration = None

        # Store data of result histograms
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="w") as out_file_h5:
            for index, condition in enumerate(hit_selection_conditions):
                pixel_tdc_hist_result = np.swapaxes(pixel_tdc_hists_per_condition[index], 0, 1)
                pixel_tdc_timestamp_hist_result = np.swapaxes(pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_hist_result = np.swapaxes(mean_pixel_tdc_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_timestamp_hist_result = np.swapaxes(mean_pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                tdc_hists_per_condition_result = tdc_hists_per_condition[index]
                tdc_corr_hist_result = np.swapaxes(tdc_corr_hists_per_condition[index], 0, 1)
                # Create result hists
                out_1 = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcCondition_%d' % index, title='Hist Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_hist_result.dtype), shape=pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_2 = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcTimestampCondition_%d' % index, title='Hist Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_timestamp_hist_result.dtype), shape=pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_3 = out_file_h5.createCArray(out_file_h5.root, name='HistMeanPixelTdcCondition_%d' % index, title='Hist Mean Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_hist_result.dtype), shape=mean_pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_4 = out_file_h5.createCArray(out_file_h5.root, name='HistMeanPixelTdcTimestampCondition_%d' % index, title='Hist Mean Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_timestamp_hist_result.dtype), shape=mean_pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_5 = out_file_h5.createCArray(out_file_h5.root, name='HistTdcCondition_%d' % index, title='Hist Tdc with %s' % condition, atom=tb.Atom.from_dtype(tdc_hists_per_condition_result.dtype), shape=tdc_hists_per_condition_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_6 = out_file_h5.createCArray(out_file_h5.root, name='HistTdcCorrCondition_%d' % index, title='Hist Correlation Tdc/Tot with %s' % condition, atom=tb.Atom.from_dtype(tdc_corr_hist_result.dtype), shape=tdc_corr_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                # Add result hists information
                out_1.attrs.dimensions, out_1.attrs.condition, out_1.attrs.tdc_values = 'column, row, TDC value', condition, range(max_tdc)
                out_2.attrs.dimensions, out_2.attrs.condition, out_2.attrs.tdc_values = 'column, row, TDC time stamp value', condition, range(256)
                out_3.attrs.dimensions, out_3.attrs.condition = 'column, row, mean TDC value', condition
                out_4.attrs.dimensions, out_4.attrs.condition = 'column, row, mean TDC time stamp value', condition
                out_5.attrs.dimensions, out_5.attrs.condition = 'PlsrDAC', condition
                out_6.attrs.dimensions, out_6.attrs.condition = 'TDC, TOT', condition
                out_1[:], out_2[:], out_3[:], out_4[:], out_5[:], out_6[:] = pixel_tdc_hist_result, pixel_tdc_timestamp_hist_result, mean_pixel_tdc_hist_result, mean_pixel_tdc_timestamp_hist_result, tdc_hists_per_condition_result, tdc_corr_hist_result

                if charge_calibration is not None:
                    # Select only valid pixel for histograming: they have data and a calibration (that is any charge(TDC) calibration != 0)
                    valid_pixel = np.where(np.logical_and(charge_calibration[:, :, :max_tdc].sum(axis=2) > 0, pixel_tdc_hist_result[:, :, :max_tdc].swapaxes(0, 1).sum(axis=2) > 0))

                    mean_charge_calibration = charge_calibration[valid_pixel][:, :max_tdc].mean(axis=0)
                    mean_tdc_hist = pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].mean(axis=0)
                    result_array = np.rec.array(np.column_stack((mean_charge_calibration, mean_tdc_hist)), dtype=[('charge', float), ('count', float)])
                    out_6 = out_file_h5.create_table(out_file_h5.root, name='HistMeanTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with mean charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_6.attrs.condition = condition
                    out_6.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_6.append(result_array)
                    # Create charge histogram with per pixel TDC(charge) calibration
                    x, y = charge_calibration[valid_pixel][:, :max_tdc].ravel(), np.ravel(pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].ravel())
                    y, x = y[x > 0], x[x > 0]  # remove the hit tdcs without proper calibration plsrDAC(TDC) calibration
                    x, y, yerr = analysis_utils.get_profile_histogram(x, y, n_bins=n_bins)
                    result_array = np.rec.array(np.column_stack((x, y, yerr)), dtype=[('charge', float), ('count', float), ('count_error', float)])
                    out_7 = out_file_h5.create_table(out_file_h5.root, name='HistTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with per pixel charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_7.attrs.condition = condition
                    out_7.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_7.append(result_array)

    # Plot Data
    with PdfPages(input_file_hits[:-3] + '_calibrated_tdc_hists.pdf') as output_pdf:
        plot_hits_per_condition(output_pdf)
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="r") as in_file_h5:
            for node in in_file_h5.root:  # go through the data and plot them
                if 'MeanPixel' in node.name:
                    try:
                        plot_three_way(np.ma.masked_invalid(node[:]) * 1.5625, title='Mean TDC delay, hits with\n%s' % node._v_attrs.condition if 'Timestamp' in node.name else 'Mean TDC, hits with\n%s' % node._v_attrs.condition, filename=output_pdf)
                    except ValueError:
                        logging.warning('Cannot plot TDC delay')
                elif 'HistTdcCondition' in node.name:
                    hist_1d = node[:]
                    entry_index = np.where(hist_1d != 0)
                    if entry_index[0].shape[0] != 0:
                        max_index = np.amax(entry_index)
                    else:
                        max_index = max_tdc
                    plot_1d_hist(hist_1d[:max_index + 10], title='TDC histogram, hits with\n%s' % node._v_attrs.condition if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits with\n%s' % node._v_attrs.condition, x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                elif 'HistPixelTdc' in node.name:
                    hist_3d = node[:]
                    entry_index = np.where(hist_3d.sum(axis=(0, 1)) != 0)
                    if entry_index[0].shape[0] != 0:
                        max_index = np.amax(entry_index)
                    else:
                        max_index = max_tdc
                    best_pixel_index = np.where(hist_3d.sum(axis=2) == np.amax(node[:].sum(axis=2)))
                    if best_pixel_index[0].shape[0] == 1:  # there could be more than one pixel with most hits
                        plot_1d_hist(hist_3d[best_pixel_index][0, :max_index], title='TDC histogram of pixel %d, %d\n%s' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1, node._v_attrs.condition) if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits of pixel %d, %d' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1), x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                elif 'HistTdcCalibratedCondition' in node.name:
                    plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, per pixel TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition), output_pdf=output_pdf)
                elif 'HistMeanTdcCalibratedCondition' in node.name:
                    plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, mean TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition), output_pdf=output_pdf)
                elif 'HistTdcCorr' in node.name:
                    plot_tdc_tot_correlation(node[:], node._v_attrs.condition, output_pdf)
def histogram_tdc_hits(input_file_hits, hit_selection_conditions, event_status_select_mask, event_status_condition, calibation_file=None, max_tdc=analysis_configuration['max_tdc'], n_bins=analysis_configuration['n_bins']):
    for condition in hit_selection_conditions:
        logging.info('Histogram tdc hits with %s', condition)

    def get_charge(max_tdc, tdc_calibration_values, tdc_pixel_calibration):  # return the charge from calibration
        charge_calibration = np.zeros(shape=(80, 336, max_tdc))
        for column in range(80):
            for row in range(336):
                actual_pixel_calibration = tdc_pixel_calibration[column, row, :]
                if np.any(actual_pixel_calibration != 0) and np.all(np.isfinite(actual_pixel_calibration)):
                    interpolation = interp1d(x=actual_pixel_calibration, y=tdc_calibration_values, kind='slinear', bounds_error=False, fill_value=0)
                    charge_calibration[column, row, :] = interpolation(np.arange(max_tdc))
        return charge_calibration

    def plot_tdc_tot_correlation(data, condition, output_pdf):
        logging.info('Plot correlation histogram for %s', condition)
        plt.clf()
        data = np.ma.array(data, mask=(data <= 0))
        if np.ma.any(data > 0):
            cmap = cm.get_cmap('jet', 200)
            cmap.set_bad('w')
            plt.title('Correlation with %s' % condition)
            norm = colors.LogNorm()
            z_max = data.max(fill_value=0)
            plt.xlabel('TDC')
            plt.ylabel('TOT')
            im = plt.imshow(data, cmap=cmap, norm=norm, aspect='auto', interpolation='nearest')  # , norm=norm)
            divider = make_axes_locatable(plt.gca())
            plt.gca().invert_yaxis()
            cax = divider.append_axes("right", size="5%", pad=0.1)
            plt.colorbar(im, cax=cax, ticks=np.linspace(start=0, stop=z_max, num=9, endpoint=True))
            output_pdf.savefig()
        else:
            logging.warning('No data for correlation plotting for %s', condition)

    def plot_hits_per_condition(output_pdf):
        logging.info('Plot hits selection efficiency histogram for %d conditions', len(hit_selection_conditions) + 2)
        labels = ['All Hits', 'Hits of\ngood events']
        for condition in hit_selection_conditions:
            condition = re.sub('[&]', '\n', condition)
            condition = re.sub('[()]', '', condition)
            labels.append(condition)
        plt.bar(range(len(n_hits_per_condition)), n_hits_per_condition, align='center')
        plt.xticks(range(len(n_hits_per_condition)), labels, size=8)
        plt.title('Number of hits for different cuts')
        plt.yscale('log')
        plt.ylabel('#')
        plt.grid()
        for x, y in zip(np.arange(len(n_hits_per_condition)), n_hits_per_condition):
            plt.annotate('%d' % (float(y) / float(n_hits_per_condition[0]) * 100.) + r'%', xy=(x, y / 2.), xycoords='data', color='grey', size=15)
        output_pdf.savefig()

    def plot_corrected_tdc_hist(x, y, title, output_pdf, point_style='-'):
        logging.info('Plot TDC hist with TDC calibration')
        plt.clf()
        y /= np.amax(y) if y.shape[0] > 0 else y
        plt.plot(x, y, point_style)
        plt.title(title, size=10)
        plt.xlabel('Charge [PlsrDAC]')
        plt.ylabel('Count [a.u.]')
        plt.grid()
        output_pdf.savefig()

    # Create data
    with tb.openFile(input_file_hits, mode="r") as in_hit_file_h5:
        cluster_hit_table = in_hit_file_h5.root.ClusterHits

        # Result hists, initialized per condition
        pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336, max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336, 256), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_hists_per_condition = [np.zeros(shape=(max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_corr_hists_per_condition = [np.zeros(shape=(max_tdc, 16), dtype=np.uint32) for _ in hit_selection_conditions] if hit_selection_conditions else []

        n_hits_per_condition = [0 for _ in range(len(hit_selection_conditions) + 2)]  # condition 1, 2 are all hits, hits of goode events

        logging.info('Select hits and create TDC histograms for %d cut conditions', len(hit_selection_conditions))
        progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=cluster_hit_table.shape[0], term_width=80)
        progress_bar.start()
        for cluster_hits, _ in analysis_utils.data_aligned_at_events(cluster_hit_table, chunk_size=1e8):
            n_hits_per_condition[0] += cluster_hits.shape[0]
            selected_events_cluster_hits = cluster_hits[np.logical_and(cluster_hits['TDC'] < max_tdc, (cluster_hits['event_status'] & event_status_select_mask) == event_status_condition)]
            n_hits_per_condition[1] += selected_events_cluster_hits.shape[0]
            for index, condition in enumerate(hit_selection_conditions):
                selected_cluster_hits = analysis_utils.select_hits(selected_events_cluster_hits, condition)
                n_hits_per_condition[2 + index] += selected_cluster_hits.shape[0]
                column, row, tdc = selected_cluster_hits['column'] - 1, selected_cluster_hits['row'] - 1, selected_cluster_hits['TDC']
                pixel_tdc_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc, shape=(80, 336, max_tdc))
                mean_pixel_tdc_hists_per_condition[index] = np.average(pixel_tdc_hists_per_condition[index], axis=2, weights=range(0, max_tdc)) * np.sum(np.arange(0, max_tdc)) / pixel_tdc_hists_per_condition[index].sum(axis=2)
                tdc_timestamp = selected_cluster_hits['TDC_time_stamp']
                pixel_tdc_timestamp_hists_per_condition[index] += analysis_utils.hist_3d_index(column, row, tdc_timestamp, shape=(80, 336, 256))
                mean_pixel_tdc_timestamp_hists_per_condition[index] = np.average(pixel_tdc_timestamp_hists_per_condition[index], axis=2, weights=range(0, 256)) * np.sum(np.arange(0, 256)) / pixel_tdc_timestamp_hists_per_condition[index].sum(axis=2)
                tdc_hists_per_condition[index] = pixel_tdc_hists_per_condition[index].sum(axis=(0, 1))
                tdc_corr_hists_per_condition[index] += analysis_utils.hist_2d_index(tdc, selected_cluster_hits['tot'], shape=(max_tdc, 16))
            progress_bar.update(n_hits_per_condition[0])
        progress_bar.finish()

        # Take TDC calibration if available and calculate charge for each TDC value and pixel
        if calibation_file is not None:
            with tb.openFile(calibation_file, mode="r") as in_file_calibration_h5:
                tdc_calibration = in_file_calibration_h5.root.HitOrCalibration[:, :, :, 1]
                tdc_calibration_values = in_file_calibration_h5.root.HitOrCalibration.attrs.scan_parameter_values[:]
            charge_calibration = get_charge(max_tdc, tdc_calibration_values, tdc_calibration)
        else:
            charge_calibration = None

        # Store data of result histograms
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="w") as out_file_h5:
            for index, condition in enumerate(hit_selection_conditions):
                pixel_tdc_hist_result = np.swapaxes(pixel_tdc_hists_per_condition[index], 0, 1)
                pixel_tdc_timestamp_hist_result = np.swapaxes(pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_hist_result = np.swapaxes(mean_pixel_tdc_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_timestamp_hist_result = np.swapaxes(mean_pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                tdc_hists_per_condition_result = tdc_hists_per_condition[index]
                tdc_corr_hist_result = np.swapaxes(tdc_corr_hists_per_condition[index], 0, 1)
                # Create result hists
                out_1 = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcCondition_%d' % index, title='Hist Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_hist_result.dtype), shape=pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_2 = out_file_h5.createCArray(out_file_h5.root, name='HistPixelTdcTimestampCondition_%d' % index, title='Hist Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_timestamp_hist_result.dtype), shape=pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_3 = out_file_h5.createCArray(out_file_h5.root, name='HistMeanPixelTdcCondition_%d' % index, title='Hist Mean Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_hist_result.dtype), shape=mean_pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_4 = out_file_h5.createCArray(out_file_h5.root, name='HistMeanPixelTdcTimestampCondition_%d' % index, title='Hist Mean Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_timestamp_hist_result.dtype), shape=mean_pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_5 = out_file_h5.createCArray(out_file_h5.root, name='HistTdcCondition_%d' % index, title='Hist Tdc with %s' % condition, atom=tb.Atom.from_dtype(tdc_hists_per_condition_result.dtype), shape=tdc_hists_per_condition_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_6 = out_file_h5.createCArray(out_file_h5.root, name='HistTdcCorrCondition_%d' % index, title='Hist Correlation Tdc/Tot with %s' % condition, atom=tb.Atom.from_dtype(tdc_corr_hist_result.dtype), shape=tdc_corr_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                # Add result hists information
                out_1.attrs.dimensions, out_1.attrs.condition, out_1.attrs.tdc_values = 'column, row, TDC value', condition, range(max_tdc)
                out_2.attrs.dimensions, out_2.attrs.condition, out_2.attrs.tdc_values = 'column, row, TDC time stamp value', condition, range(256)
                out_3.attrs.dimensions, out_3.attrs.condition = 'column, row, mean TDC value', condition
                out_4.attrs.dimensions, out_4.attrs.condition = 'column, row, mean TDC time stamp value', condition
                out_5.attrs.dimensions, out_5.attrs.condition = 'PlsrDAC', condition
                out_6.attrs.dimensions, out_6.attrs.condition = 'TDC, TOT', condition
                out_1[:], out_2[:], out_3[:], out_4[:], out_5[:], out_6[:] = pixel_tdc_hist_result, pixel_tdc_timestamp_hist_result, mean_pixel_tdc_hist_result, mean_pixel_tdc_timestamp_hist_result, tdc_hists_per_condition_result, tdc_corr_hist_result

                if charge_calibration is not None:
                    # Select only valid pixel for histograming: they have data and a calibration (that is any charge(TDC) calibration != 0)
                    valid_pixel = np.where(np.logical_and(charge_calibration[:, :, :max_tdc].sum(axis=2) > 0, pixel_tdc_hist_result[:, :, :max_tdc].swapaxes(0, 1).sum(axis=2) > 0))

                    mean_charge_calibration = charge_calibration[valid_pixel][:, :max_tdc].mean(axis=0)
                    mean_tdc_hist = pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].mean(axis=0)
                    result_array = np.rec.array(np.column_stack((mean_charge_calibration, mean_tdc_hist)), dtype=[('charge', float), ('count', float)])
                    out_6 = out_file_h5.create_table(out_file_h5.root, name='HistMeanTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with mean charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_6.attrs.condition = condition
                    out_6.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_6.append(result_array)
                    # Create charge histogram with per pixel TDC(charge) calibration
                    x, y = charge_calibration[valid_pixel][:, :max_tdc].ravel(), np.ravel(pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].ravel())
                    y, x = y[x > 0], x[x > 0]  # remove the hit tdcs without proper calibration plsrDAC(TDC) calibration
                    x, y, yerr = analysis_utils.get_profile_histogram(x, y, n_bins=n_bins)
                    result_array = np.rec.array(np.column_stack((x, y, yerr)), dtype=[('charge', float), ('count', float), ('count_error', float)])
                    out_7 = out_file_h5.create_table(out_file_h5.root, name='HistTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with per pixel charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_7.attrs.condition = condition
                    out_7.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_7.append(result_array)

    # Plot Data
    with PdfPages(input_file_hits[:-3] + '_calibrated_tdc_hists.pdf') as output_pdf:
        plot_hits_per_condition(output_pdf)
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="r") as in_file_h5:
            for node in in_file_h5.root:  # go through the data and plot them
                if 'MeanPixel' in node.name:
                    try:
                        plotThreeWay(np.ma.masked_invalid(node[:]) * 1.5625, title='Mean TDC delay, hits with\n%s' % node._v_attrs.condition if 'Timestamp' in node.name else 'Mean TDC, hits with\n%s' % node._v_attrs.condition, filename=output_pdf)
                    except ValueError:
                        logging.warning('Cannot plot TDC delay')
                elif 'HistTdcCondition' in node.name:
                    hist_1d = node[:]
                    entry_index = np.where(hist_1d != 0)
                    if entry_index[0].shape[0] != 0:
                        max_index = np.amax(entry_index)
                    else:
                        max_index = max_tdc
                    plot_1d_hist(hist_1d[:max_index + 10], title='TDC histogram, hits with\n%s' % node._v_attrs.condition if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits with\n%s' % node._v_attrs.condition, x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                elif 'HistPixelTdc' in node.name:
                    hist_3d = node[:]
                    entry_index = np.where(hist_3d.sum(axis=(0, 1)) != 0)
                    if entry_index[0].shape[0] != 0:
                        max_index = np.amax(entry_index)
                    else:
                        max_index = max_tdc
                    best_pixel_index = np.where(hist_3d.sum(axis=2) == np.amax(node[:].sum(axis=2)))
                    if best_pixel_index[0].shape[0] == 1:  # there could be more than one pixel with most hits
                        plot_1d_hist(hist_3d[best_pixel_index][0, :max_index], title='TDC histogram of pixel %d, %d\n%s' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1, node._v_attrs.condition) if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits of pixel %d, %d' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1), x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                elif 'HistTdcCalibratedCondition' in node.name:
                    plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, per pixel TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition), output_pdf=output_pdf)
                elif 'HistMeanTdcCalibratedCondition' in node.name:
                    plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, mean TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition), output_pdf=output_pdf)
                elif 'HistTdcCorr' in node.name:
                    plot_tdc_tot_correlation(node[:], node._v_attrs.condition, output_pdf)
def histogram_tdc_hits(input_file_hits, hit_selection_conditions, event_status_select_mask, event_status_condition, calibration_file=None, correct_calibration=None, max_tdc=1000, ignore_disabled_regions=True, n_bins=200, plot_data=True):
    for condition in hit_selection_conditions:
        logging.info('Histogram TDC hits with %s', condition)

    def get_charge(max_tdc, tdc_calibration_values, tdc_pixel_calibration):  # return the charge from calibration
        charge_calibration = np.zeros(shape=(80, 336, max_tdc))
        for column in range(80):
            for row in range(336):
                actual_pixel_calibration = tdc_pixel_calibration[column, row, :]
                # Only take pixels with at least 3 valid calibration points
                if np.count_nonzero(actual_pixel_calibration != 0) > 2 and np.count_nonzero(np.isfinite(actual_pixel_calibration)) > 2:
                    selected_measurements = np.isfinite(actual_pixel_calibration)  # Select valid calibration steps
                    selected_actual_pixel_calibration = actual_pixel_calibration[selected_measurements]
                    selected_tdc_calibration_values = tdc_calibration_values[selected_measurements]
                    interpolation = interp1d(x=selected_actual_pixel_calibration, y=selected_tdc_calibration_values, kind='slinear', bounds_error=False, fill_value=0)
                    charge_calibration[column, row, :] = interpolation(np.arange(max_tdc))
        return charge_calibration

    def plot_tdc_tot_correlation(data, condition, output_pdf):
        logging.info('Plot correlation histogram for %s', condition)
        plt.clf()
        data = np.ma.array(data, mask=(data <= 0))
        if np.ma.any(data > 0):
            cmap = cm.get_cmap('jet', 200)
            cmap.set_bad('w')
            plt.title('Correlation with %s' % condition)
            norm = colors.LogNorm()
            z_max = data.max(fill_value=0)
            plt.xlabel('TDC')
            plt.ylabel('TOT')
            im = plt.imshow(data, cmap=cmap, norm=norm, aspect='auto', interpolation='nearest')  # , norm=norm)
            divider = make_axes_locatable(plt.gca())
            plt.gca().invert_yaxis()
            cax = divider.append_axes("right", size="5%", pad=0.1)
            plt.colorbar(im, cax=cax, ticks=np.linspace(start=0, stop=z_max, num=9, endpoint=True))
            output_pdf.savefig()
        else:
            logging.warning('No data for correlation plotting for %s', condition)

    def plot_hits_per_condition(output_pdf):
        logging.info('Plot hits selection efficiency histogram for %d conditions', len(hit_selection_conditions) + 2)
        labels = ['All Hits', 'Hits of\ngood events']
        for condition in hit_selection_conditions:
            condition = re.sub('[&]', '\n', condition)
            condition = re.sub('[()]', '', condition)
            labels.append(condition)
        plt.clf()
        plt.bar(range(len(n_hits_per_condition)), n_hits_per_condition, align='center')
        plt.xticks(range(len(n_hits_per_condition)), labels, size=8)
        plt.title('Number of hits for different cuts')
        plt.yscale('log')
        plt.ylabel('#')
        plt.grid()
        for x, y in zip(np.arange(len(n_hits_per_condition)), n_hits_per_condition):
            plt.annotate('%d' % (float(y) / float(n_hits_per_condition[0]) * 100.) + r'%', xy=(x, y / 2.), xycoords='data', color='grey', size=15)
        output_pdf.savefig()

    def plot_corrected_tdc_hist(x, y, title, output_pdf, point_style='-'):
        logging.info('Plot TDC hist with TDC calibration')
        plt.clf()
        y /= np.amax(y) if y.shape[0] > 0 else y
        plt.plot(x, y, point_style)
        plt.title(title, size=10)
        plt.xlabel('Charge [PlsrDAC]')
        plt.ylabel('Count [a.u.]')
        plt.grid()
        output_pdf.savefig()

    def get_calibration_correction(tdc_calibration, tdc_calibration_values, filename_new_calibration):  # correct the TDC calibration with the TDC calib in filename_new_calibration by shifting the means
        with tb.open_file(filename_new_calibration, 'r') as in_file_2:
            charge_calibration_1, charge_calibration_2 = tdc_calibration, in_file_2.root.HitOrCalibration[:, :, :, 1]

            plsr_dacs = tdc_calibration_values
            if not np.all(plsr_dacs == in_file_2.root.HitOrCalibration._v_attrs.scan_parameter_values):
                raise NotImplementedError('The check calibration file has to have the same PlsrDAC values')

            # Valid pixel have a calibration in the new and the old calibration
            valid_pixel = np.where(~np.all((charge_calibration_1 == 0), axis=2) & ~np.all(np.isnan(charge_calibration_1), axis=2) & ~np.all((charge_calibration_2 == 0), axis=2) & ~np.all(np.isnan(charge_calibration_2), axis=2))
            mean_charge_calibration = np.nanmean(charge_calibration_2[valid_pixel], axis=0)
            offset_mean = np.nanmean((charge_calibration_2[valid_pixel] - charge_calibration_1[valid_pixel]), axis=0)

            dPlsrDAC_dTDC = analysis_utils.smooth_differentiation(plsr_dacs, mean_charge_calibration, order=3, smoothness=0, derivation=1)
            plt.clf()
            plt.plot(plsr_dacs, offset_mean / dPlsrDAC_dTDC, '.-', label='PlsrDAC')
            plt.plot(plsr_dacs, offset_mean, '.-', label='TDC')
            plt.grid()
            plt.xlabel('PlsrDAC')
            plt.ylabel('Mean calibration offset')
            plt.legend(loc=0)
            plt.title('Mean offset between TDC calibration data, new - old ')
            plt.savefig(filename_new_calibration[:-3] + '.pdf')
            plt.show()

            return offset_mean

    def delete_disabled_regions(hits, enable_mask):
        n_hits = hits.shape[0]

        # Tread no hits case
        if n_hits == 0:
            return hits

        # Column, row array with True for disabled pixels
        disabled_region = ~enable_mask.astype(np.bool).T.copy()
        n_disabled_pixels = np.count_nonzero(disabled_region)

        # Extend disabled pixel mask by the neighbouring pixels
        neighbour_pixels = [(-1, 0), (1, 0), (0, -1), (0, 1)]  # Disable direct neighbouring pixels
        for neighbour_pixel in neighbour_pixels:
            disabled_region = np.logical_or(disabled_region, shift(disabled_region, shift=neighbour_pixel, cval=0))

        logging.info('Masking %d additional pixel neighbouring %d disabled pixels', np.count_nonzero(disabled_region) - n_disabled_pixels, n_disabled_pixels)

        # Make 1D selection array with disabled pixels
        disabled_pixels = np.where(disabled_region)
        disabled_pixels_1d = (disabled_pixels[0] + 1) * disabled_region.shape[1] + (disabled_pixels[1] + 1)  # + 1 because pixel index 0,0 has column/row = 1

        hits_1d = hits['column'].astype(np.uint32) * disabled_region.shape[1] + hits['row']  # change dtype to fit new number
        hits = hits[np.in1d(hits_1d, disabled_pixels_1d, invert=True)]

        logging.info('Lost %d hits (%d percent) due to disabling neighbours', n_hits - hits.shape[0], (1. - float(hits.shape[0]) / n_hits) * 100)

        return hits

    # Create data
    with tb.open_file(input_file_hits, mode="r") as in_hit_file_h5:
        cluster_hit_table = in_hit_file_h5.root.ClusterHits
        try:
            enabled_pixels = in_hit_file_h5.root.ClusterHits._v_attrs.enabled_pixels[:]
        except AttributeError:  # Old and simulate data do not have this info
            logging.warning('No enabled pixel mask found in data! Assume all pixels are enabled.')
            enabled_pixels = np.ones(shape=(336, 80))

        # Result hists, initialized per condition
        pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336, max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336, 256), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        mean_pixel_tdc_timestamp_hists_per_condition = [np.zeros(shape=(80, 336), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_hists_per_condition = [np.zeros(shape=(max_tdc), dtype=np.uint16) for _ in hit_selection_conditions] if hit_selection_conditions else []
        tdc_corr_hists_per_condition = [np.zeros(shape=(max_tdc, 16), dtype=np.uint32) for _ in hit_selection_conditions] if hit_selection_conditions else []

        n_hits_per_condition = [0 for _ in range(len(hit_selection_conditions) + 2)]  # condition 1, 2 are all hits, hits of goode events

        logging.info('Select hits and create TDC histograms for %d cut conditions', len(hit_selection_conditions))
        progress_bar = progressbar.ProgressBar(widgets=['', progressbar.Percentage(), ' ', progressbar.Bar(marker='*', left='|', right='|'), ' ', progressbar.AdaptiveETA()], maxval=cluster_hit_table.shape[0], term_width=80)
        progress_bar.start()
        for cluster_hits, _ in analysis_utils.data_aligned_at_events(cluster_hit_table, chunk_size=10000000):
            n_hits_per_condition[0] += cluster_hits.shape[0]
            selected_events_cluster_hits = cluster_hits[np.logical_and(cluster_hits['TDC'] < max_tdc, (cluster_hits['event_status'] & event_status_select_mask) == event_status_condition)]
            n_hits_per_condition[1] += selected_events_cluster_hits.shape[0]
            for index, condition in enumerate(hit_selection_conditions):
                selected_cluster_hits = analysis_utils.select_hits(selected_events_cluster_hits, condition)
                if ignore_disabled_regions:
                    selected_cluster_hits = delete_disabled_regions(hits=selected_cluster_hits, enable_mask=enabled_pixels)

                n_hits_per_condition[2 + index] += selected_cluster_hits.shape[0]
                column, row, tdc = selected_cluster_hits['column'] - 1, selected_cluster_hits['row'] - 1, selected_cluster_hits['TDC']
                pixel_tdc_hists_per_condition[index] += fast_analysis_utils.hist_3d_index(column, row, tdc, shape=(80, 336, max_tdc))
                mean_pixel_tdc_hists_per_condition[index] = np.average(pixel_tdc_hists_per_condition[index], axis=2, weights=range(0, max_tdc)) * np.sum(np.arange(0, max_tdc)) / pixel_tdc_hists_per_condition[index].sum(axis=2)
                tdc_timestamp = selected_cluster_hits['TDC_time_stamp']
                pixel_tdc_timestamp_hists_per_condition[index] += fast_analysis_utils.hist_3d_index(column, row, tdc_timestamp, shape=(80, 336, 256))
                mean_pixel_tdc_timestamp_hists_per_condition[index] = np.average(pixel_tdc_timestamp_hists_per_condition[index], axis=2, weights=range(0, 256)) * np.sum(np.arange(0, 256)) / pixel_tdc_timestamp_hists_per_condition[index].sum(axis=2)
                tdc_hists_per_condition[index] = pixel_tdc_hists_per_condition[index].sum(axis=(0, 1))
                tdc_corr_hists_per_condition[index] += fast_analysis_utils.hist_2d_index(tdc, selected_cluster_hits['tot'], shape=(max_tdc, 16))
            progress_bar.update(n_hits_per_condition[0])
        progress_bar.finish()

        # Take TDC calibration if available and calculate charge for each TDC value and pixel
        if calibration_file is not None:
            with tb.open_file(calibration_file, mode="r") as in_file_calibration_h5:
                tdc_calibration = in_file_calibration_h5.root.HitOrCalibration[:, :, :, 1]
                tdc_calibration_values = in_file_calibration_h5.root.HitOrCalibration.attrs.scan_parameter_values[:]
                if correct_calibration is not None:
                    tdc_calibration += get_calibration_correction(tdc_calibration, tdc_calibration_values, correct_calibration)
            charge_calibration = get_charge(max_tdc, tdc_calibration_values, tdc_calibration)
        else:
            charge_calibration = None

        # Store data of result histograms
        with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="w") as out_file_h5:
            for index, condition in enumerate(hit_selection_conditions):
                pixel_tdc_hist_result = np.swapaxes(pixel_tdc_hists_per_condition[index], 0, 1)
                pixel_tdc_timestamp_hist_result = np.swapaxes(pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_hist_result = np.swapaxes(mean_pixel_tdc_hists_per_condition[index], 0, 1)
                mean_pixel_tdc_timestamp_hist_result = np.swapaxes(mean_pixel_tdc_timestamp_hists_per_condition[index], 0, 1)
                tdc_hists_per_condition_result = tdc_hists_per_condition[index]
                tdc_corr_hist_result = np.swapaxes(tdc_corr_hists_per_condition[index], 0, 1)
                # Create result hists
                out_1 = out_file_h5.create_carray(out_file_h5.root, name='HistPixelTdcCondition_%d' % index, title='Hist Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_hist_result.dtype), shape=pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_2 = out_file_h5.create_carray(out_file_h5.root, name='HistPixelTdcTimestampCondition_%d' % index, title='Hist Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(pixel_tdc_timestamp_hist_result.dtype), shape=pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_3 = out_file_h5.create_carray(out_file_h5.root, name='HistMeanPixelTdcCondition_%d' % index, title='Hist Mean Pixel Tdc with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_hist_result.dtype), shape=mean_pixel_tdc_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_4 = out_file_h5.create_carray(out_file_h5.root, name='HistMeanPixelTdcTimestampCondition_%d' % index, title='Hist Mean Pixel Tdc Timestamp with %s' % condition, atom=tb.Atom.from_dtype(mean_pixel_tdc_timestamp_hist_result.dtype), shape=mean_pixel_tdc_timestamp_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_5 = out_file_h5.create_carray(out_file_h5.root, name='HistTdcCondition_%d' % index, title='Hist Tdc with %s' % condition, atom=tb.Atom.from_dtype(tdc_hists_per_condition_result.dtype), shape=tdc_hists_per_condition_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                out_6 = out_file_h5.create_carray(out_file_h5.root, name='HistTdcCorrCondition_%d' % index, title='Hist Correlation Tdc/Tot with %s' % condition, atom=tb.Atom.from_dtype(tdc_corr_hist_result.dtype), shape=tdc_corr_hist_result.shape, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                # Add result hists information
                out_1.attrs.dimensions, out_1.attrs.condition, out_1.attrs.tdc_values = 'column, row, TDC value', condition, range(max_tdc)
                out_2.attrs.dimensions, out_2.attrs.condition, out_2.attrs.tdc_values = 'column, row, TDC time stamp value', condition, range(256)
                out_3.attrs.dimensions, out_3.attrs.condition = 'column, row, mean TDC value', condition
                out_4.attrs.dimensions, out_4.attrs.condition = 'column, row, mean TDC time stamp value', condition
                out_5.attrs.dimensions, out_5.attrs.condition = 'PlsrDAC', condition
                out_6.attrs.dimensions, out_6.attrs.condition = 'TDC, TOT', condition
                out_1[:], out_2[:], out_3[:], out_4[:], out_5[:], out_6[:] = pixel_tdc_hist_result, pixel_tdc_timestamp_hist_result, mean_pixel_tdc_hist_result, mean_pixel_tdc_timestamp_hist_result, tdc_hists_per_condition_result, tdc_corr_hist_result

                if charge_calibration is not None:
                    # Select only valid pixel for histogramming: they have data and a calibration (that is any charge(TDC) calibration != 0)
                    valid_pixel = np.where(np.logical_and(charge_calibration[:, :, :max_tdc].sum(axis=2) > 0, pixel_tdc_hist_result[:, :, :max_tdc].swapaxes(0, 1).sum(axis=2) > 0))
                    # Create charge histogram with mean TDC calibration
                    mean_charge_calibration = charge_calibration[valid_pixel][:, :max_tdc].mean(axis=0)
                    mean_tdc_hist = pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].mean(axis=0)
                    result_array = np.rec.array(np.column_stack((mean_charge_calibration, mean_tdc_hist)), dtype=[('charge', float), ('count', float)])
                    out_7 = out_file_h5.create_table(out_file_h5.root, name='HistMeanTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with mean charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_7.attrs.condition = condition
                    out_7.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_7.attrs.n_hits = pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].sum()
                    out_7.append(result_array)
                    # Create charge histogram with per pixel TDC calibration
                    x, y = charge_calibration[valid_pixel][:, :max_tdc].ravel(), np.ravel(pixel_tdc_hist_result.swapaxes(0, 1)[valid_pixel][:, :max_tdc].ravel())
                    y_hist, x_hist = y[x > 0], x[x > 0]  # remove the hit tdcs without proper calibration plsrDAC(TDC) calibration
                    x, y, yerr = analysis_utils.get_profile_histogram(x_hist, y_hist, n_bins=n_bins)
                    result_array = np.rec.array(np.column_stack((x, y, yerr)), dtype=[('charge', float), ('count', float), ('count_error', float)])
                    out_8 = out_file_h5.create_table(out_file_h5.root, name='HistTdcCalibratedCondition_%d' % index, description=result_array.dtype, title='Hist Tdc with per pixel charge calibration and %s' % condition, filters=tb.Filters(complib='blosc', complevel=5, fletcher32=False))
                    out_8.attrs.condition = condition
                    out_8.attrs.n_pixel = valid_pixel[0].shape[0]
                    out_8.attrs.n_hits = y_hist.sum()
                    out_8.append(result_array)

    # Plot Data
    if plot_data:
        with PdfPages(input_file_hits[:-3] + '_calibrated_tdc_hists.pdf') as output_pdf:
            plot_hits_per_condition(output_pdf)
            with tb.open_file(input_file_hits[:-3] + '_tdc_hists.h5', mode="r") as in_file_h5:
                for node in in_file_h5.root:  # go through the data and plot them
                    if 'MeanPixel' in node.name:
                        try:
                            plot_three_way(np.ma.masked_invalid(node[:]) * 1.5625, title='Mean TDC delay, hits with\n%s' % node._v_attrs.condition[:80] if 'Timestamp' in node.name else 'Mean TDC, hits with\n%s' % node._v_attrs.condition[:80], filename=output_pdf)
                        except ValueError:
                            logging.warning('Cannot plot TDC delay')
                    elif 'HistTdcCondition' in node.name:
                        hist_1d = node[:]
                        entry_index = np.where(hist_1d != 0)
                        if entry_index[0].shape[0] != 0:
                            max_index = np.amax(entry_index)
                        else:
                            max_index = max_tdc
                        plot_1d_hist(hist_1d[:max_index + 10], title='TDC histogram, hits with\n%s' % node._v_attrs.condition[:80] if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits with\n%s' % node._v_attrs.condition[:80], x_axis_title='TDC' if 'Timestamp' not in node.name else 'TDC time stamp', filename=output_pdf)
                    elif 'HistPixelTdc' in node.name:
                        hist_3d = node[:]
                        entry_index = np.where(hist_3d.sum(axis=(0, 1)) != 0)
                        if entry_index[0].shape[0] != 0:
                            max_index = np.amax(entry_index)
                        else:
                            max_index = max_tdc
                        best_pixel_index = np.where(hist_3d.sum(axis=2) == np.amax(node[:].sum(axis=2)))
                        if best_pixel_index[0].shape[0] == 1:  # there could be more than one pixel with most hits
                            try:
                                plot_1d_hist(hist_3d[best_pixel_index][0, :max_index], title='TDC histogram of pixel %d, %d\n%s' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1, node._v_attrs.condition[:80]) if 'Timestamp' not in node.name else 'TDC time stamp histogram, hits of pixel %d, %d' % (best_pixel_index[1] + 1, best_pixel_index[0] + 1), x_axis_title='TDC' if 'Timestamp' not in node.name[:80] else 'TDC time stamp', filename=output_pdf)
                            except IndexError:
                                logging.warning('Cannot plot pixel TDC histogram')
                    elif 'HistTdcCalibratedCondition' in node.name:
                        plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, per pixel TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition[:80]), output_pdf=output_pdf)
                    elif 'HistMeanTdcCalibratedCondition' in node.name:
                        plot_corrected_tdc_hist(node[:]['charge'], node[:]['count'], title='TDC histogram, %d pixel, mean TDC calib.\n%s' % (node._v_attrs.n_pixel, node._v_attrs.condition[:80]), output_pdf=output_pdf)
                    elif 'HistTdcCorr' in node.name:
                        plot_tdc_tot_correlation(node[:], node._v_attrs.condition, output_pdf)
Exemple #4
0
def analyse_n_cluster_per_event(scan_base,
                                include_no_cluster=False,
                                time_line_absolute=True,
                                combine_n_readouts=1000,
                                chunk_size=10000000,
                                plot_n_cluster_hists=False,
                                output_pdf=None,
                                output_file=None):
    ''' Determines the number of cluster per event as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts').

    Parameters
    ----------
    scan_base: list of str
        scan base names (e.g.:  ['//data//SCC_50_fei4_self_trigger_scan_390', ]
    include_no_cluster: bool
        Set to true to also consider all events without any hit.
    combine_n_readouts: int
        the number of read outs to combine (e.g. 1000)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen
    '''

    time_stamp = []
    n_cluster = []

    start_time_set = False

    for data_file in scan_base:
        with tb.open_file(data_file + '_interpreted.h5',
                          mode="r+") as in_cluster_file_h5:
            # get data and data pointer
            meta_data_array = in_cluster_file_h5.root.meta_data[:]
            cluster_table = in_cluster_file_h5.root.Cluster

            # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number)
            parameter_ranges = np.column_stack(
                (analysis_utils.get_ranges_from_array(
                    meta_data_array['timestamp_start'][::combine_n_readouts]),
                 analysis_utils.get_ranges_from_array(
                     meta_data_array['event_number'][::combine_n_readouts])))

            # create a event_numer index (important for speed)
            analysis_utils.index_event_number(cluster_table)

            # initialize the analysis and set settings
            analyze_data = AnalyzeRawData()
            analyze_data.create_tot_hist = False
            analyze_data.create_bcid_hist = False

            # variables for read speed up
            index = 0  # index where to start the read out, 0 at the beginning, increased during looping
            best_chunk_size = chunk_size

            total_cluster = cluster_table.shape[0]

            progress_bar = progressbar.ProgressBar(widgets=[
                '',
                progressbar.Percentage(), ' ',
                progressbar.Bar(marker='*', left='|', right='|'), ' ',
                progressbar.AdaptiveETA()
            ],
                                                   maxval=total_cluster,
                                                   term_width=80)
            progress_bar.start()

            # loop over the selected events
            for parameter_index, parameter_range in enumerate(
                    parameter_ranges):
                logging.debug('Analyze time stamp ' + str(parameter_range[0]) +
                              ' and data from events = [' +
                              str(parameter_range[2]) + ',' +
                              str(parameter_range[3]) + '[ ' + str(
                                  int(
                                      float(
                                          float(parameter_index) /
                                          float(len(parameter_ranges)) *
                                          100.0))) + '%')
                analyze_data.reset()  # resets the data of the last analysis

                # loop over the cluster in the actual selected events with optimizations: determine best chunk size, start word index given
                readout_cluster_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
                hist = None
                for clusters, index in analysis_utils.data_aligned_at_events(
                        cluster_table,
                        start_event_number=parameter_range[2],
                        stop_event_number=parameter_range[3],
                        start_index=index,
                        chunk_size=best_chunk_size):
                    n_cluster_per_event = analysis_utils.get_n_cluster_in_events(
                        clusters['event_number']
                    )[:,
                      1]  # array with the number of cluster per event, cluster per event are at least 1
                    if hist is None:
                        hist = np.histogram(n_cluster_per_event,
                                            bins=10,
                                            range=(0, 10))[0]
                    else:
                        hist = np.add(
                            hist,
                            np.histogram(n_cluster_per_event,
                                         bins=10,
                                         range=(0, 10))[0])
                    if include_no_cluster and parameter_range[
                            3] is not None:  # happend for the last readout
                        hist[0] = (parameter_range[3] -
                                   parameter_range[2]) - len(
                                       n_cluster_per_event
                                   )  # add the events without any cluster
                    readout_cluster_len += clusters.shape[0]
                    total_cluster -= len(clusters)
                    progress_bar.update(index)
                best_chunk_size = int(1.5 * readout_cluster_len) if int(
                    1.05 * readout_cluster_len
                ) < chunk_size else chunk_size  # to increase the readout speed, estimated the number of hits for one read instruction

                if plot_n_cluster_hists:
                    plotting.plot_1d_hist(
                        hist,
                        title='Number of cluster per event at ' +
                        str(parameter_range[0]),
                        x_axis_title='Number of cluster',
                        y_axis_title='#',
                        log_y=True,
                        filename=output_pdf)
                hist = hist.astype('f4') / np.sum(
                    hist)  # calculate fraction from total numbers

                if time_line_absolute:
                    time_stamp.append(parameter_range[0])
                else:
                    if not start_time_set:
                        start_time = parameter_ranges[0, 0]
                        start_time_set = True
                    time_stamp.append((parameter_range[0] - start_time) / 60.0)
                n_cluster.append(hist)
            progress_bar.finish()
            if total_cluster != 0:
                logging.warning(
                    'Not all clusters were selected during analysis. Analysis is therefore not exact'
                )

    if time_line_absolute:
        plotting.plot_scatter_time(
            time_stamp,
            n_cluster,
            title='Number of cluster per event as a function of time',
            marker_style='o',
            filename=output_pdf,
            legend=('0 cluster', '1 cluster', '2 cluster',
                    '3 cluster') if include_no_cluster else
            ('0 cluster not plotted', '1 cluster', '2 cluster', '3 cluster'))
    else:
        plotting.plot_scatter(
            time_stamp,
            n_cluster,
            title='Number of cluster per event as a function of time',
            x_label='time [min.]',
            marker_style='o',
            filename=output_pdf,
            legend=('0 cluster', '1 cluster', '2 cluster',
                    '3 cluster') if include_no_cluster else
            ('0 cluster not plotted', '1 cluster', '2 cluster', '3 cluster'))
    if output_file:
        with tb.open_file(output_file, mode="a") as out_file_h5:
            cluster_array = np.array(n_cluster)
            rec_array = np.array(zip(time_stamp, cluster_array[:, 0],
                                     cluster_array[:, 1], cluster_array[:, 2],
                                     cluster_array[:, 3], cluster_array[:, 4],
                                     cluster_array[:, 5]),
                                 dtype=[('time_stamp', float),
                                        ('cluster_0', float),
                                        ('cluster_1', float),
                                        ('cluster_2', float),
                                        ('cluster_3', float),
                                        ('cluster_4', float),
                                        ('cluster_5', float)
                                        ]).view(np.recarray)
            try:
                n_cluster_table = out_file_h5.create_table(
                    out_file_h5.root,
                    name='n_cluster',
                    description=rec_array,
                    title='Cluster per event',
                    filters=tb.Filters(complib='blosc',
                                       complevel=5,
                                       fletcher32=False))
                n_cluster_table[:] = rec_array
            except tb.exceptions.NodeError:
                logging.warning(
                    output_file +
                    ' has already a Beamspot note, do not overwrite existing.')
    return time_stamp, n_cluster
Exemple #5
0
def analyse_n_cluster_per_event(
    scan_base,
    include_no_cluster=False,
    time_line_absolute=True,
    combine_n_readouts=1000,
    chunk_size=10000000,
    plot_n_cluster_hists=False,
    output_pdf=None,
    output_file=None,
):
    """ Determines the number of cluster per event as a function of time. Therefore the data of a fixed number of read outs are combined ('combine_n_readouts').

    Parameters
    ----------
    scan_base: list of str
        scan base names (e.g.:  ['//data//SCC_50_fei4_self_trigger_scan_390', ]
    include_no_cluster: bool
        Set to true to also consider all events without any hit.
    combine_n_readouts: int
        the number of read outs to combine (e.g. 1000)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen
    """

    time_stamp = []
    n_cluster = []

    start_time_set = False

    for data_file in scan_base:
        with tb.openFile(data_file + "_interpreted.h5", mode="r+") as in_cluster_file_h5:
            # get data and data pointer
            meta_data_array = in_cluster_file_h5.root.meta_data[:]
            cluster_table = in_cluster_file_h5.root.Cluster

            # determine the event ranges to analyze (timestamp_start, start_event_number, stop_event_number)
            parameter_ranges = np.column_stack(
                (
                    analysis_utils.get_ranges_from_array(meta_data_array["timestamp_start"][::combine_n_readouts]),
                    analysis_utils.get_ranges_from_array(meta_data_array["event_number"][::combine_n_readouts]),
                )
            )

            # create a event_numer index (important for speed)
            analysis_utils.index_event_number(cluster_table)

            # initialize the analysis and set settings
            analyze_data = AnalyzeRawData()
            analyze_data.create_tot_hist = False
            analyze_data.create_bcid_hist = False

            # variables for read speed up
            index = 0  # index where to start the read out, 0 at the beginning, increased during looping
            best_chunk_size = chunk_size

            total_cluster = cluster_table.shape[0]

            progress_bar = progressbar.ProgressBar(
                widgets=[
                    "",
                    progressbar.Percentage(),
                    " ",
                    progressbar.Bar(marker="*", left="|", right="|"),
                    " ",
                    analysis_utils.ETA(),
                ],
                maxval=total_cluster,
                term_width=80,
            )
            progress_bar.start()

            # loop over the selected events
            for parameter_index, parameter_range in enumerate(parameter_ranges):
                logging.debug(
                    "Analyze time stamp "
                    + str(parameter_range[0])
                    + " and data from events = ["
                    + str(parameter_range[2])
                    + ","
                    + str(parameter_range[3])
                    + "[ "
                    + str(int(float(float(parameter_index) / float(len(parameter_ranges)) * 100.0)))
                    + "%"
                )
                analyze_data.reset()  # resets the data of the last analysis

                # loop over the cluster in the actual selected events with optimizations: determine best chunk size, start word index given
                readout_cluster_len = (
                    0
                )  # variable to calculate a optimal chunk size value from the number of hits for speed up
                hist = None
                for clusters, index in analysis_utils.data_aligned_at_events(
                    cluster_table,
                    start_event_number=parameter_range[2],
                    stop_event_number=parameter_range[3],
                    start=index,
                    chunk_size=best_chunk_size,
                ):
                    n_cluster_per_event = analysis_utils.get_n_cluster_in_events(clusters["event_number"])[
                        :, 1
                    ]  # array with the number of cluster per event, cluster per event are at least 1
                    if hist is None:
                        hist = np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0]
                    else:
                        hist = np.add(hist, np.histogram(n_cluster_per_event, bins=10, range=(0, 10))[0])
                    if include_no_cluster and parameter_range[3] is not None:  # happend for the last readout
                        hist[0] = (parameter_range[3] - parameter_range[2]) - len(
                            n_cluster_per_event
                        )  # add the events without any cluster
                    readout_cluster_len += clusters.shape[0]
                    total_cluster -= len(clusters)
                    progress_bar.update(index)
                best_chunk_size = (
                    int(1.5 * readout_cluster_len) if int(1.05 * readout_cluster_len) < chunk_size else chunk_size
                )  # to increase the readout speed, estimated the number of hits for one read instruction

                if plot_n_cluster_hists:
                    plotting.plot_1d_hist(
                        hist,
                        title="Number of cluster per event at " + str(parameter_range[0]),
                        x_axis_title="Number of cluster",
                        y_axis_title="#",
                        log_y=True,
                        filename=output_pdf,
                    )
                hist = hist.astype("f4") / np.sum(hist)  # calculate fraction from total numbers

                if time_line_absolute:
                    time_stamp.append(parameter_range[0])
                else:
                    if not start_time_set:
                        start_time = parameter_ranges[0, 0]
                        start_time_set = True
                    time_stamp.append((parameter_range[0] - start_time) / 60.0)
                n_cluster.append(hist)
            progress_bar.finish()
            if total_cluster != 0:
                logging.warning("Not all clusters were selected during analysis. Analysis is therefore not exact")

    if time_line_absolute:
        plotting.plot_scatter_time(
            time_stamp,
            n_cluster,
            title="Number of cluster per event as a function of time",
            marker_style="o",
            filename=output_pdf,
            legend=("0 cluster", "1 cluster", "2 cluster", "3 cluster")
            if include_no_cluster
            else ("0 cluster not plotted", "1 cluster", "2 cluster", "3 cluster"),
        )
    else:
        plotting.plot_scatter(
            time_stamp,
            n_cluster,
            title="Number of cluster per event as a function of time",
            x_label="time [min.]",
            marker_style="o",
            filename=output_pdf,
            legend=("0 cluster", "1 cluster", "2 cluster", "3 cluster")
            if include_no_cluster
            else ("0 cluster not plotted", "1 cluster", "2 cluster", "3 cluster"),
        )
    if output_file:
        with tb.openFile(output_file, mode="a") as out_file_h5:
            cluster_array = np.array(n_cluster)
            rec_array = np.array(
                zip(
                    time_stamp,
                    cluster_array[:, 0],
                    cluster_array[:, 1],
                    cluster_array[:, 2],
                    cluster_array[:, 3],
                    cluster_array[:, 4],
                    cluster_array[:, 5],
                ),
                dtype=[
                    ("time_stamp", float),
                    ("cluster_0", float),
                    ("cluster_1", float),
                    ("cluster_2", float),
                    ("cluster_3", float),
                    ("cluster_4", float),
                    ("cluster_5", float),
                ],
            ).view(np.recarray)
            try:
                n_cluster_table = out_file_h5.createTable(
                    out_file_h5.root,
                    name="n_cluster",
                    description=rec_array,
                    title="Cluster per event",
                    filters=tb.Filters(complib="blosc", complevel=5, fletcher32=False),
                )
                n_cluster_table[:] = rec_array
            except tb.exceptions.NodeError:
                logging.warning(output_file + " has already a Beamspot note, do not overwrite existing.")
    return time_stamp, n_cluster