Example #1
0
def plot_clustersize_per_event(data_files, raw_data_convert=True,logy=False):
    
    for data_file in data_files:

        if raw_data_convert: 
            analyze_raw_data = AnalyzeRawData(raw_data_file=data_file + '.h5', create_pdf=True)
            analyze_raw_data.trigger_data_format = 2 # self.dut['TLU']['DATA_FORMAT']
            analyze_raw_data.create_source_scan_hist = True
            analyze_raw_data.create_cluster_size_hist = True
            analyze_raw_data.create_cluster_tot_hist = True
            analyze_raw_data.align_at_trigger = True
            analyze_raw_data.create_cluster_table = True
            analyze_raw_data.create_empty_event_hits = True
            analyze_raw_data.interpreter.set_warning_output(False)
            analyze_raw_data.interpret_word_table()
            analyze_raw_data.interpreter.print_summary()
            analyze_raw_data.plot_histograms()
     
        timestamp, n_cluster = pyana.analyse_n_cluster_per_event(scan_base=[data_file],
                                                                    combine_n_readouts=500,
                                                                    time_line_absolute = False,
                                                                    percentage = False,
                                                                    output_pdf = None,
                                                                    output_file = data_file + '_n_cluster_per_event_histo.h5'
                                                                    )
 
        n_cluster = np.array(n_cluster)
        plot_n_cluster = n_cluster[:,1:5]

        cluster_above_3 = n_cluster[:,4:].sum(axis=1)

        plot_n_cluster[:,3] = cluster_above_3
        title = os.path.split(data_file)[1][:-26]

        plt.clf()
        plt.title('Run' + title + ' - Number of cluster per event as a function of time')
        lineObjects = plt.semilogy(timestamp, plot_n_cluster,linestyle='None', marker='o',markersize=3)
        plt.xlabel('time [min.]')
        
        plt.grid()
        plt.legend(lineObjects, ('1 cluster, mean = %.3f' % np.mean(plot_n_cluster[:,0], axis=0),
                                 '2 cluster, mean = %.3f' % np.mean(plot_n_cluster[:,1], axis=0) ,
                                 '3 cluster, mean = %.3f' % np.mean(plot_n_cluster[:,2], axis=0),
                                 '4 or more cluster\nmean = %.3f' % np.mean(plot_n_cluster[:,3], axis=0)
                                 )
                    )
        plt.savefig(data_file + '_n_cluster_per_event.pdf')
     
        cluster_sizes = [0,1,2,3,4,5,6,7,8,9]
        plt.clf()
        plt.title('Run' + title + ' - Number of cluster per event')
        
        if logy:
            plt.yscale('log')
            lineObjects = plt.plot(cluster_sizes, n_cluster.sum(axis=0),linestyle='None',marker='o',markersize=3) #
        else:
            lineObjects = plt.plot(timestamp, plot_n_cluster,linestyle='None', marker='o',markersize=1)

        plt.xlabel('cluster size')
        plt.grid()
        plt.savefig(data_file + '_n_cluster_cluster_size_logscale.pdf')
Example #2
0
def analyze_cluster_size_per_scan_parameter(input_file_hits,
                                            output_file_cluster_size,
                                            parameter='GDAC',
                                            max_chunk_size=10000000,
                                            overwrite_output_files=False,
                                            output_pdf=None):
    ''' This method takes multiple hit files and determines the cluster size for different scan parameter values of

     Parameters
    ----------
    input_files_hits: string
    output_file_cluster_size: string
        The data file with the results
    parameter: string
        The name of the parameter to separate the data into (e.g.: PlsrDAC)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    overwrite_output_files: bool
        Set to true to overwrite the output file if it already exists
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen, if False nothing is printed
    '''
    logging.info('Analyze the cluster sizes for different ' + parameter +
                 ' settings for ' + input_file_hits)
    if os.path.isfile(
            output_file_cluster_size
    ) and not overwrite_output_files:  # skip analysis if already done
        logging.info('Analyzed cluster size file ' + output_file_cluster_size +
                     ' already exists. Skip cluster size analysis.')
    else:
        with tb.open_file(
                output_file_cluster_size,
                mode="w") as out_file_h5:  # file to write the data into
            filter_table = tb.Filters(
                complib='blosc', complevel=5,
                fletcher32=False)  # compression of the written data
            parameter_goup = out_file_h5.create_group(
                out_file_h5.root, parameter,
                title=parameter)  # note to store the data
            cluster_size_total = None  # final array for the cluster size per GDAC
            with tb.open_file(
                    input_file_hits,
                    mode="r+") as in_hit_file_h5:  # open the actual hit file
                meta_data_array = in_hit_file_h5.root.meta_data[:]
                scan_parameter = analysis_utils.get_scan_parameter(
                    meta_data_array)  # get the scan parameters
                if scan_parameter:  # if a GDAC scan parameter was used analyze the cluster size per GDAC setting
                    scan_parameter_values = scan_parameter[
                        parameter]  # scan parameter settings used
                    if len(
                            scan_parameter_values
                    ) == 1:  # only analyze per scan step if there are more than one scan step
                        logging.warning('The file ' + str(input_file_hits) +
                                        ' has no different ' + str(parameter) +
                                        ' parameter values. Omit analysis.')
                    else:
                        logging.info('Analyze ' + input_file_hits +
                                     ' per scan parameter ' + parameter +
                                     ' for ' +
                                     str(len(scan_parameter_values)) +
                                     ' values from ' +
                                     str(np.amin(scan_parameter_values)) +
                                     ' to ' +
                                     str(np.amax(scan_parameter_values)))
                        event_numbers = analysis_utils.get_meta_data_at_scan_parameter(
                            meta_data_array, parameter
                        )['event_number']  # get the event numbers in meta_data where the scan parameter changes
                        parameter_ranges = np.column_stack(
                            (scan_parameter_values,
                             analysis_utils.get_ranges_from_array(
                                 event_numbers)))
                        hit_table = in_hit_file_h5.root.Hits
                        analysis_utils.index_event_number(hit_table)
                        total_hits, total_hits_2, index = 0, 0, 0
                        chunk_size = max_chunk_size
                        # initialize the analysis and set settings
                        analyze_data = AnalyzeRawData()
                        analyze_data.create_cluster_size_hist = True
                        analyze_data.create_cluster_tot_hist = True
                        analyze_data.histogram.set_no_scan_parameter(
                        )  # one has to tell histogram the # of scan parameters for correct occupancy hist allocation
                        progress_bar = progressbar.ProgressBar(
                            widgets=[
                                '',
                                progressbar.Percentage(), ' ',
                                progressbar.Bar(marker='*',
                                                left='|',
                                                right='|'), ' ',
                                progressbar.AdaptiveETA()
                            ],
                            maxval=hit_table.shape[0],
                            term_width=80)
                        progress_bar.start()
                        for parameter_index, parameter_range in enumerate(
                                parameter_ranges
                        ):  # loop over the selected events
                            analyze_data.reset(
                            )  # resets the data of the last analysis
                            logging.debug(
                                'Analyze GDAC = ' + str(parameter_range[0]) +
                                ' ' + str(
                                    int(
                                        float(
                                            float(parameter_index) /
                                            float(len(parameter_ranges)) *
                                            100.0))) + '%')
                            start_event_number = parameter_range[1]
                            stop_event_number = parameter_range[2]
                            logging.debug('Data from events = [' +
                                          str(start_event_number) + ',' +
                                          str(stop_event_number) + '[')
                            actual_parameter_group = out_file_h5.create_group(
                                parameter_goup,
                                name=parameter + '_' + str(parameter_range[0]),
                                title=parameter + '_' +
                                str(parameter_range[0]))
                            # loop over the hits in the actual selected events with optimizations: variable chunk size, start word index given
                            readout_hit_len = 0  # variable to calculate a optimal chunk size value from the number of hits for speed up
                            for hits, index in analysis_utils.data_aligned_at_events(
                                    hit_table,
                                    start_event_number=start_event_number,
                                    stop_event_number=stop_event_number,
                                    start_index=index,
                                    chunk_size=chunk_size):
                                total_hits += hits.shape[0]
                                analyze_data.analyze_hits(
                                    hits
                                )  # analyze the selected hits in chunks
                                readout_hit_len += hits.shape[0]
                                progress_bar.update(index)
                            chunk_size = int(1.05 * readout_hit_len) if int(
                                1.05 * readout_hit_len
                            ) < max_chunk_size else max_chunk_size  # to increase the readout speed, estimated the number of hits for one read instruction
                            if chunk_size < 50:  # limit the lower chunk size, there can always be a crazy event with more than 20 hits
                                chunk_size = 50
                            # get occupancy hist
                            occupancy = analyze_data.histogram.get_occupancy(
                            )  # just check here if histogram is consistent

                            # store and plot cluster size hist
                            cluster_size_hist = analyze_data.clusterizer.get_cluster_size_hist(
                            )
                            cluster_size_hist_table = out_file_h5.create_carray(
                                actual_parameter_group,
                                name='HistClusterSize',
                                title='Cluster Size Histogram',
                                atom=tb.Atom.from_dtype(
                                    cluster_size_hist.dtype),
                                shape=cluster_size_hist.shape,
                                filters=filter_table)
                            cluster_size_hist_table[:] = cluster_size_hist
                            if output_pdf is not False:
                                plotting.plot_cluster_size(
                                    hist=cluster_size_hist,
                                    title='Cluster size (' +
                                    str(np.sum(cluster_size_hist)) +
                                    ' entries) for ' + parameter + ' = ' +
                                    str(scan_parameter_values[parameter_index]
                                        ),
                                    filename=output_pdf)
                            if cluster_size_total is None:  # true if no data was appended to the array yet
                                cluster_size_total = cluster_size_hist
                            else:
                                cluster_size_total = np.vstack(
                                    [cluster_size_total, cluster_size_hist])

                            total_hits_2 += np.sum(occupancy)
                        progress_bar.finish()
                        if total_hits != total_hits_2:
                            logging.warning(
                                'Analysis shows inconsistent number of hits. Check needed!'
                            )
                        logging.info('Analyzed %d hits!', total_hits)
            cluster_size_total_out = out_file_h5.create_carray(
                out_file_h5.root,
                name='AllHistClusterSize',
                title='All Cluster Size Histograms',
                atom=tb.Atom.from_dtype(cluster_size_total.dtype),
                shape=cluster_size_total.shape,
                filters=filter_table)
            cluster_size_total_out[:] = cluster_size_total
Example #3
0
def analyze_cluster_size_per_scan_parameter(
    input_file_hits,
    output_file_cluster_size,
    parameter="GDAC",
    max_chunk_size=10000000,
    overwrite_output_files=False,
    output_pdf=None,
):
    """ This method takes multiple hit files and determines the cluster size for different scan parameter values of

     Parameters
    ----------
    input_files_hits: string
    output_file_cluster_size: string
        The data file with the results
    parameter: string
        The name of the parameter to separate the data into (e.g.: PlsrDAC)
    max_chunk_size: int
        the maximum chunk size used during read, if too big memory error occurs, if too small analysis takes longer
    overwrite_output_files: bool
        Set to true to overwrite the output file if it already exists
    output_pdf: PdfPages
        PdfPages file object, if none the plot is printed to screen, if False nothing is printed
    """
    logging.info("Analyze the cluster sizes for different " + parameter + " settings for " + input_file_hits)
    if os.path.isfile(output_file_cluster_size) and not overwrite_output_files:  # skip analysis if already done
        logging.info(
            "Analyzed cluster size file " + output_file_cluster_size + " already exists. Skip cluster size analysis."
        )
    else:
        with tb.openFile(output_file_cluster_size, mode="w") as out_file_h5:  # file to write the data into
            filter_table = tb.Filters(complib="blosc", complevel=5, fletcher32=False)  # compression of the written data
            parameter_goup = out_file_h5.createGroup(
                out_file_h5.root, parameter, title=parameter
            )  # note to store the data
            cluster_size_total = None  # final array for the cluster size per GDAC
            with tb.openFile(input_file_hits, mode="r+") as in_hit_file_h5:  # open the actual hit file
                meta_data_array = in_hit_file_h5.root.meta_data[:]
                scan_parameter = analysis_utils.get_scan_parameter(meta_data_array)  # get the scan parameters
                if scan_parameter:  # if a GDAC scan parameter was used analyze the cluster size per GDAC setting
                    scan_parameter_values = scan_parameter[parameter]  # scan parameter settings used
                    if (
                        len(scan_parameter_values) == 1
                    ):  # only analyze per scan step if there are more than one scan step
                        logging.warning(
                            "The file "
                            + str(input_file_hits)
                            + " has no different "
                            + str(parameter)
                            + " parameter values. Omit analysis."
                        )
                    else:
                        logging.info(
                            "Analyze "
                            + input_file_hits
                            + " per scan parameter "
                            + parameter
                            + " for "
                            + str(len(scan_parameter_values))
                            + " values from "
                            + str(np.amin(scan_parameter_values))
                            + " to "
                            + str(np.amax(scan_parameter_values))
                        )
                        event_numbers = analysis_utils.get_meta_data_at_scan_parameter(meta_data_array, parameter)[
                            "event_number"
                        ]  # get the event numbers in meta_data where the scan parameter changes
                        parameter_ranges = np.column_stack(
                            (scan_parameter_values, analysis_utils.get_ranges_from_array(event_numbers))
                        )
                        hit_table = in_hit_file_h5.root.Hits
                        analysis_utils.index_event_number(hit_table)
                        total_hits, total_hits_2, index = 0, 0, 0
                        chunk_size = max_chunk_size
                        # initialize the analysis and set settings
                        analyze_data = AnalyzeRawData()
                        analyze_data.create_cluster_size_hist = True
                        analyze_data.create_cluster_tot_hist = True
                        analyze_data.histograming.set_no_scan_parameter()  # one has to tell the histogramer the # of scan parameters for correct occupancy hist allocation
                        progress_bar = progressbar.ProgressBar(
                            widgets=[
                                "",
                                progressbar.Percentage(),
                                " ",
                                progressbar.Bar(marker="*", left="|", right="|"),
                                " ",
                                analysis_utils.ETA(),
                            ],
                            maxval=hit_table.shape[0],
                            term_width=80,
                        )
                        progress_bar.start()
                        for parameter_index, parameter_range in enumerate(
                            parameter_ranges
                        ):  # loop over the selected events
                            analyze_data.reset()  # resets the data of the last analysis
                            logging.debug(
                                "Analyze GDAC = "
                                + str(parameter_range[0])
                                + " "
                                + str(int(float(float(parameter_index) / float(len(parameter_ranges)) * 100.0)))
                                + "%"
                            )
                            start_event_number = parameter_range[1]
                            stop_event_number = parameter_range[2]
                            logging.debug(
                                "Data from events = [" + str(start_event_number) + "," + str(stop_event_number) + "["
                            )
                            actual_parameter_group = out_file_h5.createGroup(
                                parameter_goup,
                                name=parameter + "_" + str(parameter_range[0]),
                                title=parameter + "_" + str(parameter_range[0]),
                            )
                            # loop over the hits in the actual selected events with optimizations: variable chunk size, start word index given
                            readout_hit_len = (
                                0
                            )  # variable to calculate a optimal chunk size value from the number of hits for speed up
                            for hits, index in analysis_utils.data_aligned_at_events(
                                hit_table,
                                start_event_number=start_event_number,
                                stop_event_number=stop_event_number,
                                start=index,
                                chunk_size=chunk_size,
                            ):
                                total_hits += hits.shape[0]
                                analyze_data.analyze_hits(hits)  # analyze the selected hits in chunks
                                readout_hit_len += hits.shape[0]
                                progress_bar.update(index)
                            chunk_size = (
                                int(1.05 * readout_hit_len)
                                if int(1.05 * readout_hit_len) < max_chunk_size
                                else max_chunk_size
                            )  # to increase the readout speed, estimated the number of hits for one read instruction
                            if (
                                chunk_size < 50
                            ):  # limit the lower chunk size, there can always be a crazy event with more than 20 hits
                                chunk_size = 50
                            # get occupancy hist
                            occupancy = (
                                analyze_data.histograming.get_occupancy()
                            )  # just here to check histograming is consistend

                            # store and plot cluster size hist
                            cluster_size_hist = analyze_data.clusterizer.get_cluster_size_hist()
                            cluster_size_hist_table = out_file_h5.createCArray(
                                actual_parameter_group,
                                name="HistClusterSize",
                                title="Cluster Size Histogram",
                                atom=tb.Atom.from_dtype(cluster_size_hist.dtype),
                                shape=cluster_size_hist.shape,
                                filters=filter_table,
                            )
                            cluster_size_hist_table[:] = cluster_size_hist
                            if output_pdf is not False:
                                plotting.plot_cluster_size(
                                    hist=cluster_size_hist,
                                    title="Cluster size ("
                                    + str(np.sum(cluster_size_hist))
                                    + " entries) for "
                                    + parameter
                                    + " = "
                                    + str(scan_parameter_values[parameter_index]),
                                    filename=output_pdf,
                                )
                            if cluster_size_total is None:  # true if no data was appended to the array yet
                                cluster_size_total = cluster_size_hist
                            else:
                                cluster_size_total = np.vstack([cluster_size_total, cluster_size_hist])

                            total_hits_2 += np.sum(occupancy)
                        progress_bar.finish()
                        if total_hits != total_hits_2:
                            logging.warning("Analysis shows inconsistent number of hits. Check needed!")
                        logging.info("Analyzed %d hits!", total_hits)
            cluster_size_total_out = out_file_h5.createCArray(
                out_file_h5.root,
                name="AllHistClusterSize",
                title="All Cluster Size Histograms",
                atom=tb.Atom.from_dtype(cluster_size_total.dtype),
                shape=cluster_size_total.shape,
                filters=filter_table,
            )
            cluster_size_total_out[:] = cluster_size_total