Example #1
0
                    df_data = pd.read_parquet('df/data_dataset')
                    #df_data = uproot.open('../data/AnalysisResults.root')['LambdaTree'].arrays(library="pd")
                    # df_data = df_data.append(df_data_r, ignore_index=True)
                    df_data_cent = df_data.query(
                        f'matter {split_ineq_sign} and centrality > {cent_bins[0]} and centrality < {cent_bins[1]} and pt > 0.5 and pt < 3 and ct > {ct_bins[0]} and ct < {ct_bins[1]} and tpcClV0Pi > 69 and tpcClV0Pr > 69 and radius > 3'
                    )
                    del df_data

                    data_y_score = model_hdl.predict(df_data_cent)
                    df_data_cent['model_output'] = data_y_score

                    df_data_cent = df_data_cent.query(
                        f'model_output > {score_eff_arrays_dict[bin][len(eff_array)-1]}'
                    )
                    df_data_cent.to_parquet(f'df/{bin}.parquet.gzip',
                                            compression='gzip')
                else:
                    df_data = TreeHandler()
                    df_data.get_handler_from_large_file(
                        DATA_PATH,
                        "LambdaTree",
                        preselection=
                        f'matter {split_ineq_sign} and centrality > {cent_bins[0]} and centrality < {cent_bins[1]} and pt > 0.5 and pt < 3 and ct > {ct_bins[0]} and ct < {ct_bins[1]}',
                        max_workers=8)

                    df_data.apply_model_handler(model_hdl)
                    df_data.apply_preselections(
                        f'model_output > {score_eff_arrays_dict[bin][len(eff_array)-1]}'
                    )
                    df_data.write_df_to_parquet_files(bin, "df/")
Example #2
0
            data_tree_handler = TreeHandler()
            data_tree_handler.set_data_frame(df_data_cent)
            del df_data_cent

            data_tree_handler.slice_data_frame(
                'ct',
                list(zip(CT_BINS[i_cent_bins][:-1], CT_BINS[i_cent_bins][1:])))
            model_hdl_array = np.empty((len(CT_BINS[i_cent_bins]) - 1, ),
                                       dtype=object)

            for i_ct_bins in range(len(CT_BINS[i_cent_bins]) - 1):
                bin = f'{split}_{cent_bins[0]}_{cent_bins[1]}_{CT_BINS[i_cent_bins][i_ct_bins]}_{CT_BINS[i_cent_bins][i_ct_bins+1]}'
                model_hdl_array[i_ct_bins] = ModelHandler()
                if OPTIMIZED:
                    model_hdl_array[i_ct_bins].load_model_handler(
                        f'models/{bin}_optimized_trained')
                else:
                    model_hdl_array[i_ct_bins].load_model_handler(
                        f'models/{bin}_trained')

            data_tree_handler.apply_model_handler(list(model_hdl_array))
            eff_array = np.arange(0.10, 0.91, 0.01)

            for i_ct_bins in range(len(CT_BINS[i_cent_bins]) - 1):
                bin = f'{split}_{cent_bins[0]}_{cent_bins[1]}_{CT_BINS[i_cent_bins][i_ct_bins]}_{CT_BINS[i_cent_bins][i_ct_bins+1]}'
                slice = data_tree_handler.get_slice(i_ct_bins)
                slice.query(
                    f'model_output > {score_eff_arrays_dict[bin][len(eff_array)-1]}'
                )
                slice.to_parquet(f'df/{bin}', compression='gzip')