df_data = pd.read_parquet('df/data_dataset') #df_data = uproot.open('../data/AnalysisResults.root')['LambdaTree'].arrays(library="pd") # df_data = df_data.append(df_data_r, ignore_index=True) df_data_cent = df_data.query( f'matter {split_ineq_sign} and centrality > {cent_bins[0]} and centrality < {cent_bins[1]} and pt > 0.5 and pt < 3 and ct > {ct_bins[0]} and ct < {ct_bins[1]} and tpcClV0Pi > 69 and tpcClV0Pr > 69 and radius > 3' ) del df_data data_y_score = model_hdl.predict(df_data_cent) df_data_cent['model_output'] = data_y_score df_data_cent = df_data_cent.query( f'model_output > {score_eff_arrays_dict[bin][len(eff_array)-1]}' ) df_data_cent.to_parquet(f'df/{bin}.parquet.gzip', compression='gzip') else: df_data = TreeHandler() df_data.get_handler_from_large_file( DATA_PATH, "LambdaTree", preselection= f'matter {split_ineq_sign} and centrality > {cent_bins[0]} and centrality < {cent_bins[1]} and pt > 0.5 and pt < 3 and ct > {ct_bins[0]} and ct < {ct_bins[1]}', max_workers=8) df_data.apply_model_handler(model_hdl) df_data.apply_preselections( f'model_output > {score_eff_arrays_dict[bin][len(eff_array)-1]}' ) df_data.write_df_to_parquet_files(bin, "df/")
data_tree_handler = TreeHandler() data_tree_handler.set_data_frame(df_data_cent) del df_data_cent data_tree_handler.slice_data_frame( 'ct', list(zip(CT_BINS[i_cent_bins][:-1], CT_BINS[i_cent_bins][1:]))) model_hdl_array = np.empty((len(CT_BINS[i_cent_bins]) - 1, ), dtype=object) for i_ct_bins in range(len(CT_BINS[i_cent_bins]) - 1): bin = f'{split}_{cent_bins[0]}_{cent_bins[1]}_{CT_BINS[i_cent_bins][i_ct_bins]}_{CT_BINS[i_cent_bins][i_ct_bins+1]}' model_hdl_array[i_ct_bins] = ModelHandler() if OPTIMIZED: model_hdl_array[i_ct_bins].load_model_handler( f'models/{bin}_optimized_trained') else: model_hdl_array[i_ct_bins].load_model_handler( f'models/{bin}_trained') data_tree_handler.apply_model_handler(list(model_hdl_array)) eff_array = np.arange(0.10, 0.91, 0.01) for i_ct_bins in range(len(CT_BINS[i_cent_bins]) - 1): bin = f'{split}_{cent_bins[0]}_{cent_bins[1]}_{CT_BINS[i_cent_bins][i_ct_bins]}_{CT_BINS[i_cent_bins][i_ct_bins+1]}' slice = data_tree_handler.get_slice(i_ct_bins) slice.query( f'model_output > {score_eff_arrays_dict[bin][len(eff_array)-1]}' ) slice.to_parquet(f'df/{bin}', compression='gzip')