def main(): # Read arguments parser = argparse.ArgumentParser(description='Make performance files') parser.add_argument('--config_file', type=str, required=True, help='') parser.add_argument( '--obs_time', type=str, required=True, help= 'Observation time, should be given as a string, value and astropy unit separated by an empty space' ) mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument('--wave', dest="mode", action='store_const', const="wave", default="tail", help="if set, use wavelet cleaning") mode_group.add_argument( '--tail', dest="mode", action='store_const', const="tail", help="if set, use tail cleaning, otherwise wavelets") args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) # Add obs. time in configuration file str_obs_time = args.obs_time.split() cfg['analysis']['obs_time'] = { 'value': float(str_obs_time[0]), 'unit': str(str_obs_time[-1]) } # Create output directory if necessary outdir = os.path.join( cfg['general']['outdir'], 'irf_{}_ThSq_{}_Time{:.2f}{}'.format( args.mode, cfg['analysis']['thsq_opt']['type'], cfg['analysis']['obs_time']['value'], cfg['analysis']['obs_time']['unit'])) if not os.path.exists(outdir): os.makedirs(outdir) indir = cfg['general']['indir'] template_input_file = cfg['general']['template_input_file'] # Load data particles = ['gamma', 'electron', 'proton'] evt_dict = dict() # Contain DL2 file for each type of particle for particle in particles: # template looks like dl2_{}_{}_merged.h5 infile = os.path.join(indir, template_input_file.format(args.mode, particle)) evt_dict[particle] = pd.read_hdf(infile, key='reco_events') # Apply offset cut to proton and electron for particle in ['electron', 'proton']: # print('Initial stat: {} {}'.format(len(evt_dict[particle]), particle)) evt_dict[particle] = evt_dict[particle].query('offset <= {}'.format( cfg['particle_information'][particle]['offset_cut'])) # Add required data in configuration file for future computation for particle in particles: cfg['particle_information'][particle]['n_files'] = \ len(np.unique(evt_dict[particle]['obs_id'])) cfg['particle_information'][particle]['n_simulated'] = \ cfg['particle_information'][particle]['n_files'] * cfg['particle_information'][particle]['n_events_per_file'] # Define model for the particles model_dict = { 'gamma': CrabSpectrum('hegra').model, 'proton': cosmic_ray_flux, 'electron': cosmic_ray_flux } # Reco energy binning cfg_binning = cfg['analysis']['ereco_binning'] ereco = np.logspace(np.log10(cfg_binning['emin']), np.log10(cfg_binning['emax']), cfg_binning['nbin'] + 1) * u.TeV # Handle theta square cut optimisation # (compute 68 % containment radius PSF if necessary) thsq_opt_type = cfg['analysis']['thsq_opt']['type'] if thsq_opt_type in 'fixed': thsq_values = np.array([cfg['analysis']['thsq_opt']['value']]) * u.deg print('Using fixed theta cut: {}'.format(thsq_values)) elif thsq_opt_type in 'opti': thsq_values = np.arange(0.05, 0.40, 0.01) * u.deg print('Optimising theta cut for: {}'.format(thsq_values)) elif thsq_opt_type in 'r68': print('Using R68% theta cut') print('Computing...') cfg_binning = cfg['analysis']['ereco_binning'] ereco = np.logspace(np.log10(cfg_binning['emin']), np.log10(cfg_binning['emax']), cfg_binning['nbin'] + 1) * u.TeV radius = 68 thsq_values = list() for ibin in range(len(ereco) - 1): emin = ereco[ibin] emax = ereco[ibin + 1] energy_query = 'reco_energy > {} and reco_energy <= {}'.format( emin.value, emax.value) data = evt_dict['gamma'].query(energy_query).copy() min_stat = 0 if len(data) <= min_stat: print(' ==> Not enough statistics:') print('To be handled...') thsq_values.append(0.3) continue # import sys # sys.exit() psf = np.percentile(data['offset'], radius) psf_err = psf / np.sqrt(len(data)) thsq_values.append(psf) thsq_values = np.array(thsq_values) * u.deg # Set 0.05 as a lower value idx = np.where(thsq_values.value < 0.05) thsq_values[idx] = 0.05 * u.deg print('Using theta cut: {}'.format(thsq_values)) # Cuts optimisation print('### Finding best cuts...') cut_optimiser = CutsOptimisation(config=cfg, evt_dict=evt_dict, verbose_level=0) # Weight events print('- Weighting events...') cut_optimiser.weight_events( model_dict=model_dict, colname_mc_energy=cfg['column_definition']['mc_energy']) # Find best cutoff to reach best sensitivity print('- Estimating cutoffs...') cut_optimiser.find_best_cutoff(energy_values=ereco, angular_values=thsq_values) # Save results and auxiliary data for diagnostic print('- Saving results to disk...') cut_optimiser.write_results(outdir, '{}.fits'.format( cfg['general']['output_table_name']), format='fits') # Cuts diagnostic print('### Building cut diagnostics...') cut_diagnostic = CutsDiagnostic(config=cfg, indir=outdir) cut_diagnostic.plot_optimisation_summary() cut_diagnostic.plot_diagnostics() # Apply cuts and save data print('### Applying cuts to data...') cut_applicator = CutsApplicator(config=cfg, evt_dict=evt_dict, outdir=outdir) cut_applicator.apply_cuts() # Irf Maker print('### Building IRF...') irf_maker = IrfMaker(config=cfg, evt_dict=evt_dict, outdir=outdir) irf_maker.build_irf() # Sensitivity maker print('### Estimating sensitivity...') sensitivity_maker = SensitivityMaker(config=cfg, outdir=outdir) sensitivity_maker.load_irf() sensitivity_maker.estimate_sensitivity()
def main(): # INITIALIZE CLI arguments args = initialize_script_arguments() # LOAD CONFIGURATION FILE cfg = load_config(args.config_file) # INPUT CONFIGURATION # Import parameters if args.indir is None: data_dir = cfg["General"]["data_dir"] else: data_dir = args.indir if args.outdir is None: outdir = cfg["General"]["outdir"] else: outdir = args.outdir if not os.path.exists(outdir): os.makedirs(outdir) # Get file containing gammas (signal) if args.infile_signal is None: data_sig_file = cfg["General"]["data_sig_file"].format(args.mode) else: data_sig_file = args.infile_signal filename_sig = path.join(data_dir, data_sig_file) print(f"INPUT SIGNAL FILE PATH= {filename_sig}") # Cameras to use if args.cameras_from_config: print("GETTING CAMERAS FROM CONFIGURATION FILE") cam_ids = cfg["General"]["cam_id_list"] elif args.cameras_from_file: print("GETTING CAMERAS FROM SIGNAL TRAINING FILE") # in the same analysis all particle types are analyzed in the # same way so we can just use gammas cam_ids = get_camera_names(filename_sig) else: print("GETTING CAMERAS FROM CLI") cam_ids = args.cam_id_lists.split() # The names of the tables inside the HDF5 file are the camera's names table_name = [cam_id for cam_id in cam_ids] # Dataset split train-test fraction train_fraction = cfg["Split"]["train_fraction"] # Name of target quantity target_name = cfg["Method"]["target_name"] # Get list of features features_basic = cfg["FeatureList"]["Basic"] features_derived = cfg["FeatureList"]["Derived"] feature_list = features_basic + list(features_derived) print("Going to use the following features to train the model:") print(feature_list) # sort features_to_use alphabetically to ensure order # preservation with model.predict in protopipe.scripts feature_list = sorted(feature_list) # GridSearchCV use_GridSearchCV = cfg["GridSearchCV"]["use"] scoring = cfg["GridSearchCV"]["scoring"] cv = cfg["GridSearchCV"]["cv"] # Hyper-parameters of the main model tuned_parameters = cfg["Method"]["tuned_parameters"] # Initialize the model dynamically # There always at least one (main) model to initialize model_to_use = cfg['Method']['name'] module_name = '.'.join(model_to_use.split('.', 2)[:-1]) class_name = model_to_use.split('.')[-1] module = importlib.import_module(module_name) # sklearn.XXX model = getattr(module, class_name) print(f"Going to use {module_name}.{class_name}...") # Check for any base estimator if main model is a meta-estimator if "base_estimator" in cfg['Method']: base_estimator_cfg = cfg['Method']['base_estimator'] base_estimator_name = base_estimator_cfg['name'] base_estimator_pars = base_estimator_cfg['parameters'] base_estimator_module_name = '.'.join( base_estimator_name.split('.', 2)[:-1]) base_estimator_class_name = base_estimator_name.split('.')[-1] base_estimator_module = importlib.import_module( base_estimator_module_name) # sklearn.XXX base_estimator_model = getattr(base_estimator_module, base_estimator_class_name) initialized_base_estimator = base_estimator_model( **base_estimator_pars) print( f"...based on {base_estimator_module_name}.{base_estimator_class_name}" ) initialized_model = model(base_estimator=initialized_base_estimator, **cfg['Method']['tuned_parameters']) else: initialized_model = model(**cfg['Method']['tuned_parameters']) # Map model types to the models supported by the script model_types = { "regressor": ["RandomForestRegressor", "AdaBoostRegressor"], "classifier": ["RandomForestClassifier"] } if class_name in model_types["regressor"]: # Get the selection cuts cuts = make_cut_list(cfg["SigFiducialCuts"]) elif class_name in model_types["classifier"]: # read background file from either config file or CLI if args.infile_background is None: data_bkg_file = cfg["General"]["data_bkg_file"].format(args.mode) else: data_bkg_file = args.infile_background # filename_sig = path.join(data_dir, data_sig_file) filename_bkg = path.join(data_dir, data_bkg_file) # table_name = [table_name_template + cam_id for cam_id in cam_ids] # Get the selection cuts sig_cuts = make_cut_list(cfg["SigFiducialCuts"]) bkg_cuts = make_cut_list(cfg["BkgFiducialCuts"]) use_same_number_of_sig_and_bkg_for_training = cfg["Split"][ "use_same_number_of_sig_and_bkg_for_training"] else: raise ValueError("ERROR: not a supported model") print("### Using {} for model construction".format(model_to_use)) print(f"LIST OF CAMERAS TO USE = {cam_ids}") models = dict() for idx, cam_id in enumerate(cam_ids): print("### Building model for {}".format(cam_id)) if class_name in model_types["regressor"]: # Load data data_sig = pd.read_hdf(filename_sig, table_name[idx], mode="r") # Add any derived feature and apply fiducial cuts data_sig = prepare_data(ds=data_sig, derived_features=features_derived, select_data=True, cuts=cuts) if args.max_events: data_sig = data_sig[0:args.max_events] print(f"Going to split {len(data_sig)} SIGNAL images...") # Initialize the model factory = TrainModel(case="regressor", target_name=target_name, feature_name_list=feature_list) # Split the TRAINING dataset in a train and test sub-datasets # Useful to test the models before using them for DL2 production factory.split_data(data_sig=data_sig, train_fraction=train_fraction) print("Training sample: sig {}".format(len(factory.data_train))) print("Test sample: sig {}".format(len(factory.data_test))) else: # if it's not a regressor it's a classifier # Load data data_sig = pd.read_hdf(filename_sig, table_name[idx], mode="r") data_bkg = pd.read_hdf(filename_bkg, table_name[idx], mode="r") # Add label data_sig = prepare_data(ds=data_sig, label=1, cuts=sig_cuts, select_data=True, derived_features=features_derived) data_bkg = prepare_data(ds=data_bkg, label=0, cuts=bkg_cuts, select_data=True, derived_features=features_derived) if args.max_events: data_sig = data_sig[0:args.max_events] data_bkg = data_bkg[0:args.max_events] print( f"Going to split {len(data_sig)} SIGNAL images and {len(data_bkg)} BACKGROUND images" ) # Initialize the model factory = TrainModel(case="classifier", target_name=target_name, feature_name_list=feature_list) # Split the TRAINING dataset in a train and test sub-datasets # Useful to test the models before using them for DL2 production factory.split_data( data_sig=data_sig, data_bkg=data_bkg, train_fraction=train_fraction, force_same_nsig_nbkg= use_same_number_of_sig_and_bkg_for_training, ) print("Training sample: sig {} and bkg {}".format( len(factory.data_train.query("label==1")), len(factory.data_train.query("label==0")), )) print("Test sample: sig {} and bkg {}".format( len(factory.data_test.query("label==1")), len(factory.data_test.query("label==0")), )) if use_GridSearchCV: # Apply optimization of the hyper-parameters via grid search # and return best model best_model = factory.get_optimal_model(initialized_model, tuned_parameters, scoring=scoring, cv=cv) else: # otherwise use directly the initial model best_model = initialized_model # Fit the chosen model on the train data best_model.fit( factory.data_scikit["X_train"], factory.data_scikit["y_train"], sample_weight=factory.data_scikit["w_train"], ) if class_name in model_types["classifier"]: print( classification_report( factory.data_scikit["y_test"], best_model.predict(factory.data_scikit["X_test"]), )) # Calibrate model if necessary on test data (GridSearchCV) if use_GridSearchCV and cfg["Method"]["calibrate_output"]: print("==> Calibrate classifier...") best_model = CalibratedClassifierCV(best_model, method="sigmoid", cv="prefit") best_model.fit(factory.data_scikit["X_test"], factory.data_scikit["y_test"]) save_output(models, cam_id, factory, best_model, model_types, class_name, outdir)
def main(): # Read arguments parser = argparse.ArgumentParser(description="Make diagnostic plot") parser.add_argument("--config_file", type=str, required=True) mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument( "--wave", dest="mode", action="store_const", const="wave", default="tail", help="if set, use wavelet cleaning", ) mode_group.add_argument( "--tail", dest="mode", action="store_const", const="tail", help="if set, use tail cleaning, otherwise wavelets", ) args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) model_type = cfg["General"]["model_type"] # Import parameters indir = cfg["General"]["outdir"] cam_ids = cfg["General"]["cam_id_list"] # Model method_name = cfg["Method"]["name"] target_name = cfg["Method"]["target_name"] if model_type in "classifier": use_proba = cfg["Method"]["use_proba"] # Diagnostic nbins = cfg["Diagnostic"]["energy"]["nbins"] energy_edges = np.logspace( np.log10(cfg["Diagnostic"]["energy"]["min"]), np.log10(cfg["Diagnostic"]["energy"]["max"]), nbins + 1, True, ) # Will be further used to get model output of events diagnostic = dict() for idx, cam_id in enumerate(cam_ids): print("### Model diagnostic for {}".format(cam_id)) # Load data data_scikit = load_obj( path.join( indir, "data_scikit_{}_{}_{}_{}.pkl.gz".format( model_type, method_name, args.mode, cam_id ), ) ) data_train = pd.read_pickle( path.join( indir, "data_train_{}_{}_{}_{}.pkl.gz".format( model_type, method_name, args.mode, cam_id ), ) ) data_test = pd.read_pickle( path.join( indir, "data_test_{}_{}_{}_{}.pkl.gz".format( model_type, method_name, args.mode, cam_id ), ) ) # Load model outname = "{}_{}_{}_{}.pkl.gz".format( model_type, args.mode, cam_id, method_name ) model = joblib.load(path.join(indir, outname)) outdir = os.path.join( indir, "diagnostic_{}_{}_{}_{}".format(model_type, method_name, args.mode, cam_id), ) if not os.path.exists(outdir): os.makedirs(outdir) if model_type in "regressor": diagnostic[cam_id] = RegressorDiagnostic( model=model, feature_name_list=cfg["FeatureList"], target_name=target_name, data_train=data_train, data_test=data_test, output_name="reco_energy", ) elif model_type in "classifier": if use_proba is True: ouput_model_name = "gammaness" else: ouput_model_name = "score" diagnostic[cam_id] = ClassifierDiagnostic( model=model, feature_name_list=cfg["FeatureList"], target_name=target_name, data_train=data_train, data_test=data_test, model_output_name=ouput_model_name, is_output_proba=use_proba, ) # Image-level diagnostic - feature importance plt.figure(figsize=(5, 5)) ax = plt.gca() ax = diagnostic[cam_id].plot_feature_importance( ax, **{"alpha": 0.7, "edgecolor": "black", "linewidth": 2, "color": "darkgreen"} ) ax.set_ylabel("Feature importance") ax.grid() plt.title(cam_id) plt.tight_layout() save_fig(outdir, "feature_importances") # Diagnostic for regressor if model_type in "regressor": # Image-level diagnostic[cam_id] - features fig, axes = diagnostic[cam_id].plot_features( data_list=[data_train, data_test], nbin=30, hist_kwargs_list=[ { "edgecolor": "blue", "color": "blue", "label": "Gamma training", "alpha": 0.2, "fill": True, "ls": "-", "lw": 2, }, { "edgecolor": "blue", "color": "blue", "label": "Gamma test", "alpha": 1, "fill": False, "ls": "--", "lw": 2, }, ], error_kw_list=[ dict(ecolor="blue", lw=2, capsize=2, capthick=2, alpha=0.2), dict(ecolor="blue", lw=2, capsize=2, capthick=2, alpha=0.2), ], ncols=3, ) plt.title(cam_id) fig.tight_layout() save_fig(outdir, "features", fig=fig) # Compute averaged energy print("Process test sample...") data_test_evt = get_evt_subarray_model_output( data_test, weight_name="sum_signal_cam", keep_cols=["mc_energy"], model_output_name="reco_energy_img", model_output_name_evt="reco_energy", ) ncols = 5 nrows = ( int(nbins / ncols) if nbins % ncols == 0 else int((nbins + 1) / ncols) ) if nrows == 0: nrows = 1 ncols = 1 fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(5 * 5, 10)) try: axes = axes.flatten() except: axes = [axes] bias = [] resolution = [] energy_centres = [] for ibin in range(len(energy_edges) - 1): ax = axes[ibin] data = data_test_evt.query( "mc_energy >= {} and mc_energy < {}".format( energy_edges[ibin], energy_edges[ibin + 1] ) ) print("Estimate energy for {} evts".format(len(data))) er = data["reco_energy"] emc = data["mc_energy"] opt_hist = { "edgecolor": "black", "color": "darkgreen", "label": "data", "alpha": 0.7, "fill": True, } opt_fit = {"c": "red", "lw": 2, "label": "Best fit"} ax, fit_param, cov = diagnostic[cam_id].plot_resolution_distribution( ax=ax, y_true=emc, y_reco=er, nbin=50, fit_range=[-2, 2], hist_kwargs=opt_hist, fit_kwargs=opt_fit, ) if fit_param[2] < 0: # negative value are allowed for the fit fit_param[2] *= -1 label = "[{:.2f},{:.2f}] TeV\n#Evts={}\nmean={:.2f}\nstd={:.2f}".format( energy_edges[ibin], energy_edges[ibin + 1], len(er), fit_param[1], fit_param[2], ) ax.set_ylabel("# Evts") ax.set_xlabel("(ereco-emc) / emc") ax.set_xlim([-2, 2]) ax.grid() evt_patch = mpatches.Patch(color="white", label=label) data_patch = mpatches.Patch(color="blue", label="data") fit_patch = mpatches.Patch(color="red", label="best fit") ax.legend(loc="best", handles=[evt_patch, data_patch, fit_patch]) plt.tight_layout() print( " Fit results: ({:.3f},{:.3f} TeV)".format( energy_edges[ibin], energy_edges[ibin + 1] ) ) try: print(" - A : {:.3f} +/- {:.3f}".format(fit_param[0], cov[0][0])) print(" - mean : {:.3f} +/- {:.3f}".format(fit_param[1], cov[1][1])) print(" - std : {:.3f} +/- {:.3f}".format(fit_param[2], cov[2][2])) except: print(" ==> Problem with fit, no covariance...".format()) continue bias.append(fit_param[1]) resolution.append(fit_param[2]) energy_centres.append( (energy_edges[ibin] + energy_edges[ibin + 1]) / 2.0 ) save_fig(outdir, "migration_distribution", fig=fig) plt.figure(figsize=(5, 5)) ax = plt.gca() ax.plot( energy_centres, resolution, marker="s", color="darkorange", label="Resolution", ) ax.plot(energy_centres, bias, marker="s", color="darkgreen", label="Bias") ax.set_xlabel("True energy [TeV]") ax.set_ylabel("Energy resolution") ax.set_xscale("log") ax.grid() ax.legend() ax.set_ylim([-0.2, 1.2]) plt.title(cam_id) plt.tight_layout() save_fig(outdir, "energy_resolution") # Write results t = Table() t["ENERGY"] = Column( energy_centres, unit="TeV", description="Energy centers" ) t["BIAS"] = Column(bias, unit="", description="Bias from gauusian fit") t["RESOL"] = Column( bias, unit="", description="Resolution from gauusian fit" ) t.write( os.path.join(outdir, "energy_resolution.fits"), format="fits", overwrite=True, ) elif model_type in "classifier": # Image-level diagnostic - features fig, axes = diagnostic[cam_id].plot_features( data_list=[ data_train.query("label==1"), data_test.query("label==1"), data_train.query("label==0"), data_test.query("label==0"), ], nbin=30, hist_kwargs_list=[ { "edgecolor": "blue", "color": "blue", "label": "Gamma training sample", "alpha": 0.2, "fill": True, "ls": "-", "lw": 2, }, { "edgecolor": "blue", "color": "blue", "label": "Gamma test sample", "alpha": 1, "fill": False, "ls": "--", "lw": 2, }, { "edgecolor": "red", "color": "red", "label": "Proton training sample", "alpha": 0.2, "fill": True, "ls": "-", "lw": 2, }, { "edgecolor": "red", "color": "red", "label": "Proton test sample", "alpha": 1, "fill": False, "ls": "--", "lw": 2, }, ], error_kw_list=[ dict(ecolor="blue", lw=2, capsize=3, capthick=2, alpha=0.2), dict(ecolor="blue", lw=2, capsize=3, capthick=2, alpha=1), dict(ecolor="red", lw=2, capsize=3, capthick=2, alpha=0.2), dict(ecolor="red", lw=2, capsize=3, capthick=2, alpha=1), ], ncols=3, ) plt.title(cam_id) fig.tight_layout() save_fig(outdir, "features", fig=fig) if method_name in "AdaBoostClassifier": # Image-level diagnostic - method plt.figure(figsize=(5, 5)) ax = plt.gca() opt = {"color": "darkgreen", "ls": "-", "lw": 2} BoostedDecisionTreeDiagnostic.plot_error_rate( ax, model, data_scikit, **opt ) plt.title(cam_id) plt.tight_layout() save_fig(path, outdir, "bdt_diagnostic_error_rate") plt.figure(figsize=(5, 5)) ax = plt.gca() BoostedDecisionTreeDiagnostic.plot_tree_error_rate(ax, model, **opt) plt.title(cam_id) plt.tight_layout() save_fig(path, outdir, "bdt_diagnostic_tree_error_rate") # Image-level diagnostic - model output fig, ax = diagnostic[cam_id].plot_image_model_output_distribution(nbin=50) ax[0].set_xlim([0, 1]) plt.title(cam_id) fig.tight_layout() save_fig(outdir, "image_distribution", fig=fig) # Image-level diagnostic - ROC curve on train and test samples plt.figure(figsize=(5, 5)) ax = plt.gca() plot_roc_curve( ax, diagnostic[cam_id].data_train[diagnostic[cam_id].model_output_name], diagnostic[cam_id].data_train["label"], **dict(color="darkgreen", lw=2, label="Training sample") ) plot_roc_curve( ax, data_test[diagnostic[cam_id].model_output_name], diagnostic[cam_id].data_test["label"], **dict(color="darkorange", lw=2, label="Test sample") ) ax.set_xlabel("False Positive Rate") ax.set_ylabel("True Positive Rate") ax.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--") ax.legend(loc="lower right") plt.title(cam_id) plt.tight_layout() save_fig(outdir, "image_roc_curve") # Parameters for energy variation cut_list = [ "reco_energy >= {:.2f} and reco_energy <= {:.2f}".format( energy_edges[i], energy_edges[i + 1] ) for i in range(len(energy_edges) - 1) ] hist_kwargs_list = [ { "edgecolor": "blue", "color": "blue", "label": "Gamma training sample", "alpha": 0.2, "fill": True, "ls": "-", "lw": 2, }, { "edgecolor": "blue", "color": "blue", "label": "Gamma test sample", "alpha": 1, "fill": False, "ls": "--", "lw": 2, }, { "edgecolor": "red", "color": "red", "label": "Proton training sample", "alpha": 0.2, "fill": True, "ls": "-", "lw": 2, }, { "edgecolor": "red", "color": "red", "label": "Proton test sample", "alpha": 1, "fill": False, "ls": "--", "lw": 2, }, ] error_kw_list = [ dict(ecolor="blue", lw=2, capsize=3, capthick=2, alpha=0.2), dict(ecolor="blue", lw=2, capsize=3, capthick=2, alpha=1), dict(ecolor="red", lw=2, capsize=3, capthick=2, alpha=0.2), dict(ecolor="red", lw=2, capsize=3, capthick=2, alpha=1), ] # Image-level diagnostic - model output distribution variation n_feature = len(cut_list) ncols = 2 nrows = ( int(n_feature / ncols) if n_feature % ncols == 0 else int((n_feature + 1) / ncols) ) fig, axes = plt.subplots( nrows=nrows, ncols=ncols, figsize=(5 * ncols, 3 * nrows) ) if nrows == 1 and ncols == 1: axes = [axes] else: axes = axes.flatten() data_list = [ data_train.query("label==1"), data_test.query("label==1"), data_train.query("label==0"), data_test.query("label==0"), ] for i, colname in enumerate(cut_list): ax = axes[i] # Range for binning the_range = [0, 1] for j, data in enumerate(data_list): if len(data) == 0: continue ax = plot_hist( ax=ax, data=data.query(cut_list[i])[ouput_model_name], nbin=30, limit=the_range, norm=True, yerr=True, hist_kwargs=hist_kwargs_list[j], error_kw=error_kw_list[j], ) ax.set_xlim(the_range) ax.set_xlabel(ouput_model_name) ax.set_ylabel("Arbitrary units") ax.legend(loc="best", fontsize="x-small") ax.set_title(cut_list[i]) ax.grid() fig.tight_layout() save_fig(outdir, "image_distribution_variation", fig=fig) # Image-level diagnostic - ROC curve variation on test sample plt.figure(figsize=(5, 5)) ax = plt.gca() color = 1.0 step_color = 1.0 / (len(cut_list)) for i, cut in enumerate(cut_list): c = color - (i + 1) * step_color data = data_test.query(cut) if len(data) == 0: continue opt = dict( color=str(c), lw=2, label="{}".format(cut.replace("reco_energy", "E")), ) plot_roc_curve(ax, data[ouput_model_name], data["label"], **opt) ax.plot([0, 1], [0, 1], color="navy", lw=2, linestyle="--") ax.set_title(cam_id) ax.set_xlabel("False Positive Rate") ax.set_ylabel("True Positive Rate") ax.legend(loc="lower right", fontsize="x-small") plt.tight_layout() save_fig(outdir, "image_roc_curve_variation")
def main(): # Argument parser parser = make_argparser() parser.add_argument("--regressor_dir", default="./", help="regressors directory") parser.add_argument("--classifier_dir", default="./", help="regressors directory") parser.add_argument( "--force_tailcut_for_extended_cleaning", type=str2bool, default=False, help="For tailcut cleaning for energy/score estimation", ) parser.add_argument( "--save_images", action="store_true", help="Save images in images.h5 (one file testing)", ) args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) # Read site layout site = cfg["General"]["site"] array = cfg["General"]["array"] cameras = cfg["General"]["cam_id_list"] # Add force_tailcut_for_extended_cleaning in configuration cfg["General"][ "force_tailcut_for_extended_cleaning"] = args.force_tailcut_for_extended_cleaning cfg["General"]["force_mode"] = "tail" force_mode = args.mode if cfg["General"]["force_tailcut_for_extended_cleaning"] is True: force_mode = "tail" print("force_mode={}".format(force_mode)) print("mode={}".format(args.mode)) if args.infile_list: filenamelist = [] for f in args.infile_list: filenamelist += glob("{}/{}".format(args.indir, f)) filenamelist.sort() if not filenamelist: print("no files found; check indir: {}".format(args.indir)) exit(-1) # keeping track of events and where they were rejected evt_cutflow = CutFlow("EventCutFlow") img_cutflow = CutFlow("ImageCutFlow") # Event preparer preper = EventPreparer(config=cfg, mode=args.mode, event_cutflow=evt_cutflow, image_cutflow=img_cutflow) # Regressor and classifier methods regressor_method = cfg["EnergyRegressor"]["method_name"] classifier_method = cfg["GammaHadronClassifier"]["method_name"] use_proba_for_classifier = cfg["GammaHadronClassifier"]["use_proba"] if regressor_method in ["None", "none", None]: use_regressor = False else: use_regressor = True if classifier_method in ["None", "none", None]: use_classifier = False else: use_classifier = True # Classifiers if use_classifier: classifier_files = (args.classifier_dir + "/classifier_{mode}_{cam_id}_{classifier}.pkl.gz") clf_file = classifier_files.format( **{ "mode": force_mode, "wave_args": "mixed", "classifier": classifier_method, "cam_id": "{cam_id}", }) classifier = EventClassifier.load(clf_file, cam_id_list=cameras) # Regressors if use_regressor: regressor_files = (args.regressor_dir + "/regressor_{mode}_{cam_id}_{regressor}.pkl.gz") reg_file = regressor_files.format( **{ "mode": force_mode, "wave_args": "mixed", "regressor": regressor_method, "cam_id": "{cam_id}", }) regressor = EnergyRegressor.load(reg_file, cam_id_list=cameras) # catch ctr-c signal to exit current loop and still display results signal_handler = SignalHandler() signal.signal(signal.SIGINT, signal_handler) # Declaration of the column descriptor for the (possible) images file class StoredImages(tb.IsDescription): event_id = tb.Int32Col(dflt=1, pos=0) tel_id = tb.Int16Col(dflt=1, pos=1) dl1_phe_image = tb.Float32Col(shape=(1855), pos=2) mc_phe_image = tb.Float32Col(shape=(1855), pos=3) # this class defines the reconstruction parameters to keep track of class RecoEvent(tb.IsDescription): obs_id = tb.Int16Col(dflt=-1, pos=0) event_id = tb.Int32Col(dflt=-1, pos=1) NTels_trig = tb.Int16Col(dflt=0, pos=2) NTels_reco = tb.Int16Col(dflt=0, pos=3) NTels_reco_lst = tb.Int16Col(dflt=0, pos=4) NTels_reco_mst = tb.Int16Col(dflt=0, pos=5) NTels_reco_sst = tb.Int16Col(dflt=0, pos=6) mc_energy = tb.Float32Col(dflt=np.nan, pos=7) reco_energy = tb.Float32Col(dflt=np.nan, pos=8) reco_alt = tb.Float32Col(dflt=np.nan, pos=9) reco_az = tb.Float32Col(dflt=np.nan, pos=10) offset = tb.Float32Col(dflt=np.nan, pos=11) xi = tb.Float32Col(dflt=np.nan, pos=12) ErrEstPos = tb.Float32Col(dflt=np.nan, pos=13) ErrEstDir = tb.Float32Col(dflt=np.nan, pos=14) gammaness = tb.Float32Col(dflt=np.nan, pos=15) success = tb.BoolCol(dflt=False, pos=16) score = tb.Float32Col(dflt=np.nan, pos=17) h_max = tb.Float32Col(dflt=np.nan, pos=18) reco_core_x = tb.Float32Col(dflt=np.nan, pos=19) reco_core_y = tb.Float32Col(dflt=np.nan, pos=20) mc_core_x = tb.Float32Col(dflt=np.nan, pos=21) mc_core_y = tb.Float32Col(dflt=np.nan, pos=22) reco_outfile = tb.open_file( mode="w", # if no outfile name is given (i.e. don't to write the event list to disk), # need specify two "driver" arguments **({ "filename": args.outfile } if args.outfile else { "filename": "no_outfile.h5", "driver": "H5FD_CORE", "driver_core_backing_store": False, })) reco_table = reco_outfile.create_table("/", "reco_events", RecoEvent) reco_event = reco_table.row # Create the images file only if the user want to store the images if args.save_images is True: images_outfile = tb.open_file("images.h5", mode="w") images_table = {} images_phe = {} # Telescopes in analysis allowed_tels = set(prod3b_tel_ids(array, site=site)) for i, filename in enumerate(filenamelist): source = event_source(input_url=filename, allowed_tels=allowed_tels, max_events=args.max_events) # loop that cleans and parametrises the images and performs the reconstruction for ( event, dl1_phe_image, mc_phe_image, n_pixel_dict, hillas_dict, hillas_dict_reco, n_tels, tot_signal, max_signals, n_cluster_dict, reco_result, impact_dict, ) in preper.prepare_event(source): # Angular quantities run_array_direction = event.mcheader.run_array_direction # Angular separation between true and reco direction xi = angular_separation(event.mc.az, event.mc.alt, reco_result.az, reco_result.alt) # Angular separation bewteen the center of the camera and the reco direction. offset = angular_separation( run_array_direction[0], # az run_array_direction[1], # alt reco_result.az, reco_result.alt, ) # Height of shower maximum h_max = reco_result.h_max if hillas_dict is not None: # Estimate particle energy if use_regressor is True: energy_tel = np.zeros(len(hillas_dict.keys())) weight_tel = np.zeros(len(hillas_dict.keys())) for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = event.inst.subarray.tel[tel_id].camera.cam_id moments = hillas_dict[tel_id] model = regressor.model_dict[cam_id] # Features to be fed in the regressor features_img = np.array([ np.log10(moments.intensity), np.log10(impact_dict[tel_id].value), moments.width.value, moments.length.value, h_max.value, ]) energy_tel[idx] = model.predict([features_img]) weight_tel[idx] = moments.intensity reco_energy = np.sum( weight_tel * energy_tel) / sum(weight_tel) else: reco_energy = np.nan # Estimate particle score/gammaness if use_classifier is True: score_tel = np.zeros(len(hillas_dict.keys())) gammaness_tel = np.zeros(len(hillas_dict.keys())) weight_tel = np.zeros(len(hillas_dict.keys())) for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = event.inst.subarray.tel[tel_id].camera.cam_id moments = hillas_dict[tel_id] model = classifier.model_dict[cam_id] # Features to be fed in the classifier features_img = np.array([ np.log10(reco_energy), moments.width.value, moments.length.value, moments.skewness, moments.kurtosis, h_max.value, ]) # Output of classifier according to type of classifier if use_proba_for_classifier is False: score_tel[idx] = model.decision_function( [features_img]) else: gammaness_tel[idx] = model.predict_proba( [features_img])[:, 1] # Should test other weighting strategy (e.g. power of charge, impact, etc.) # For now, weighting a la Mars weight_tel[idx] = np.sqrt(moments.intensity) # Weight the final decision/proba if use_proba_for_classifier is True: gammaness = np.sum( weight_tel * gammaness_tel) / sum(weight_tel) else: score = np.sum( weight_tel * score_tel) / sum(weight_tel) else: score = np.nan gammaness = np.nan # Regardless if energy or gammaness is estimated, if the user # wants to save the images of the run we do it here # (Probably not the most efficient way, but for one file is ok) if args.save_images is True: for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = event.inst.subarray.tel[tel_id].camera.cam_id if cam_id not in images_phe: images_table[cam_id] = images_outfile.create_table( "/", "_".join(["images", cam_id]), StoredImages) images_phe[cam_id] = images_table[cam_id].row shower = event.mc mc_core_x = shower.core_x mc_core_y = shower.core_y reco_core_x = reco_result.core_x reco_core_y = reco_result.core_y alt, az = reco_result.alt, reco_result.az # Fill table's attributes reco_event["NTels_trig"] = len(event.dl0.tels_with_data) reco_event["NTels_reco"] = len(hillas_dict) reco_event["NTels_reco_lst"] = n_tels["LST_LST_LSTCam"] reco_event["NTels_reco_mst"] = n_tels["MST_MST_NectarCam"] reco_event["NTels_reco_sst"] = n_tels["SST"] # will change reco_event["reco_energy"] = reco_energy reco_event["reco_alt"] = alt.to("deg").value reco_event["reco_az"] = az.to("deg").value reco_event["offset"] = offset.to("deg").value reco_event["xi"] = xi.to("deg").value reco_event["h_max"] = h_max.to("m").value reco_event["reco_core_x"] = reco_core_x.to("m").value reco_event["reco_core_y"] = reco_core_y.to("m").value reco_event["mc_core_x"] = mc_core_x.to("m").value reco_event["mc_core_y"] = mc_core_y.to("m").value if use_proba_for_classifier is True: reco_event["gammaness"] = gammaness else: reco_event["score"] = score reco_event["success"] = True reco_event["ErrEstPos"] = np.nan reco_event["ErrEstDir"] = np.nan else: reco_event["success"] = False # save basic event infos reco_event["mc_energy"] = event.mc.energy.to("TeV").value reco_event["event_id"] = event.r1.event_id reco_event["obs_id"] = event.r1.obs_id if args.save_images is True: images_phe[cam_id]["event_id"] = event.r0.event_id images_phe[cam_id]["tel_id"] = tel_id images_phe[cam_id]["dl1_phe_image"] = dl1_phe_image images_phe[cam_id]["mc_phe_image"] = mc_phe_image images_phe[cam_id].append() # Fill table reco_table.flush() reco_event.append() if signal_handler.stop: break if signal_handler.stop: break # make sure everything gets written out nicely reco_table.flush() if args.save_images is True: for table in images_table.values(): table.flush() # Add in meta-data's table? try: print() evt_cutflow() print() img_cutflow() except ZeroDivisionError: pass print("Job done!")
def main(): # Read arguments parser = argparse.ArgumentParser(description='Make diagnostic plot') parser.add_argument('--config_file', type=str, required=True) mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument('--wave', dest="mode", action='store_const', const="wave", default="tail", help="if set, use wavelet cleaning") mode_group.add_argument( '--tail', dest="mode", action='store_const', const="tail", help="if set, use tail cleaning, otherwise wavelets") args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) model_type = cfg['General']['model_type'] # Import parameters indir = cfg['General']['outdir'] cam_ids = cfg['General']['cam_id_list'] # Model method_name = cfg['Method']['name'] target_name = cfg['Method']['target_name'] if model_type in 'classifier': use_proba = cfg['Method']['use_proba'] # Diagnostic nbins = cfg['Diagnostic']['energy']['nbins'] energy_edges = np.logspace(np.log10(cfg['Diagnostic']['energy']['min']), np.log10(cfg['Diagnostic']['energy']['max']), nbins + 1, True) # Will be further used to get model output of events diagnostic = dict() for idx, cam_id in enumerate(cam_ids): print('### Model diagnostic for {}'.format(cam_id)) # Load data data_scikit = load_obj( path.join( indir, 'data_scikit_{}_{}_{}_{}.pkl.gz'.format( model_type, method_name, args.mode, cam_id))) data_train = pd.read_pickle( path.join( indir, 'data_train_{}_{}_{}_{}.pkl.gz'.format(model_type, method_name, args.mode, cam_id))) data_test = pd.read_pickle( path.join( indir, 'data_test_{}_{}_{}_{}.pkl.gz'.format(model_type, method_name, args.mode, cam_id))) # Load model outname = '{}_{}_{}_{}.pkl.gz'.format(model_type, args.mode, cam_id, method_name) model = joblib.load(path.join(indir, outname)) outdir = os.path.join( indir, 'diagnostic_{}_{}_{}_{}'.format(model_type, method_name, args.mode, cam_id)) if not os.path.exists(outdir): os.makedirs(outdir) if model_type in 'regressor': diagnostic[cam_id] = RegressorDiagnostic( model=model, feature_name_list=cfg['FeatureList'], target_name=target_name, data_train=data_train, data_test=data_test, output_name='reco_energy') elif model_type in 'classifier': if use_proba is True: ouput_model_name = 'gammaness' else: ouput_model_name = 'score' diagnostic[cam_id] = ClassifierDiagnostic( model=model, feature_name_list=cfg['FeatureList'], target_name=target_name, data_train=data_train, data_test=data_test, model_output_name=ouput_model_name, is_output_proba=use_proba) # Image-level diagnostic - feature importance plt.figure(figsize=(5, 5)) ax = plt.gca() ax = diagnostic[cam_id].plot_feature_importance( ax, **{ 'alpha': 0.7, 'edgecolor': 'black', 'linewidth': 2, 'color': 'darkgreen' }) ax.set_ylabel('Feature importance') ax.grid() plt.title(cam_id) plt.tight_layout() plt.savefig(path.join(outdir, 'feature_importances.pdf')) # Diagnostic for regressor if model_type in 'regressor': # Image-level diagnostic[cam_id] - features fig, axes = diagnostic[cam_id].plot_features( data_list=[data_train, data_test], nbin=30, hist_kwargs_list=[{ 'edgecolor': 'blue', 'color': 'blue', 'label': 'Gamma training', 'alpha': 0.2, 'fill': True, 'ls': '-', 'lw': 2 }, { 'edgecolor': 'blue', 'color': 'blue', 'label': 'Gamma test', 'alpha': 1, 'fill': False, 'ls': '--', 'lw': 2 }], error_kw_list=[ dict(ecolor='blue', lw=2, capsize=2, capthick=2, alpha=0.2), dict(ecolor='blue', lw=2, capsize=2, capthick=2, alpha=0.2) ], ncols=3) plt.title(cam_id) fig.tight_layout() fig.savefig(path.join(outdir, 'features.pdf')) # Compute averaged energy print('Process test sample...') data_test_evt = get_evt_subarray_model_output( data_test, weight_name='sum_signal_cam', keep_cols=['mc_energy'], model_output_name='reco_energy_img', model_output_name_evt='reco_energy') ncols = 5 nrows = int(nbins / ncols) if nbins % ncols == 0 else int( (nbins + 1) / ncols) if nrows == 0: nrows = 1 ncols = 1 fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(5 * 5, 10)) try: axes = axes.flatten() except: axes = [axes] bias = [] resolution = [] energy_centres = [] for ibin in range(len(energy_edges) - 1): ax = axes[ibin] data = data_test_evt.query( 'mc_energy >= {} and mc_energy < {}'.format( energy_edges[ibin], energy_edges[ibin + 1])) print('Estimate energy for {} evts'.format(len(data))) er = data['reco_energy'] emc = data['mc_energy'] opt_hist = { 'edgecolor': 'black', 'color': 'darkgreen', 'label': 'data', 'alpha': 0.7, 'fill': True } opt_fit = {'c': 'red', 'lw': 2, 'label': 'Best fit'} ax, fit_param, cov = diagnostic[ cam_id].plot_resolution_distribution(ax=ax, y_true=emc, y_reco=er, nbin=50, fit_range=[-2, 2], hist_kwargs=opt_hist, fit_kwargs=opt_fit) if fit_param[2] < 0: # negative value are allowed for the fit fit_param[2] *= -1 label = '[{:.2f},{:.2f}] TeV\n#Evts={}\nmean={:.2f}\nstd={:.2f}'.format( energy_edges[ibin], energy_edges[ibin + 1], len(er), fit_param[1], fit_param[2]) ax.set_ylabel('# Evts') ax.set_xlabel('(ereco-emc) / emc') ax.set_xlim([-2, 2]) ax.grid() evt_patch = mpatches.Patch(color='white', label=label) data_patch = mpatches.Patch(color='blue', label='data') fit_patch = mpatches.Patch(color='red', label='best fit') ax.legend(loc='best', handles=[evt_patch, data_patch, fit_patch]) plt.tight_layout() print(' Fit results: ({:.3f},{:.3f} TeV)'.format( energy_edges[ibin], energy_edges[ibin + 1])) try: print(' - A : {:.3f} +/- {:.3f}'.format( fit_param[0], cov[0][0])) print(' - mean : {:.3f} +/- {:.3f}'.format( fit_param[1], cov[1][1])) print(' - std : {:.3f} +/- {:.3f}'.format( fit_param[2], cov[2][2])) except: print(' ==> Problem with fit, no covariance...'.format()) continue bias.append(fit_param[1]) resolution.append(fit_param[2]) energy_centres.append( (energy_edges[ibin] + energy_edges[ibin + 1]) / 2.) plt.savefig(path.join(outdir, 'migration_distribution.pdf')) plt.figure(figsize=(5, 5)) ax = plt.gca() ax.plot(energy_centres, resolution, marker='s', color='darkorange', label='Resolution') ax.plot(energy_centres, bias, marker='s', color='darkgreen', label='Bias') ax.set_xlabel('True energy [TeV]') ax.set_ylabel('Energy resolution') ax.set_xscale('log') ax.grid() ax.legend() ax.set_ylim([-0.2, 1.2]) plt.title(cam_id) plt.tight_layout() plt.savefig(path.join(outdir, 'energy_resolution.pdf')) # Write results t = Table() t['ENERGY'] = Column(energy_centres, unit='TeV', description='Energy centers') t['BIAS'] = Column(bias, unit='', description='Bias from gauusian fit') t['RESOL'] = Column(bias, unit='', description='Resolution from gauusian fit') t.write(os.path.join(outdir, 'energy_resolution.fits'), format='fits', overwrite=True) elif model_type in 'classifier': # Image-level diagnostic - features fig, axes = diagnostic[cam_id].plot_features( data_list=[ data_train.query('label==1'), data_test.query('label==1'), data_train.query('label==0'), data_test.query('label==0') ], nbin=30, hist_kwargs_list=[{ 'edgecolor': 'blue', 'color': 'blue', 'label': 'Gamma training sample', 'alpha': 0.2, 'fill': True, 'ls': '-', 'lw': 2 }, { 'edgecolor': 'blue', 'color': 'blue', 'label': 'Gamma test sample', 'alpha': 1, 'fill': False, 'ls': '--', 'lw': 2 }, { 'edgecolor': 'red', 'color': 'red', 'label': 'Proton training sample', 'alpha': 0.2, 'fill': True, 'ls': '-', 'lw': 2 }, { 'edgecolor': 'red', 'color': 'red', 'label': 'Proton test sample', 'alpha': 1, 'fill': False, 'ls': '--', 'lw': 2 }], error_kw_list=[ dict(ecolor='blue', lw=2, capsize=3, capthick=2, alpha=0.2), dict(ecolor='blue', lw=2, capsize=3, capthick=2, alpha=1), dict(ecolor='red', lw=2, capsize=3, capthick=2, alpha=0.2), dict(ecolor='red', lw=2, capsize=3, capthick=2, alpha=1) ], ncols=3) plt.title(cam_id) fig.tight_layout() fig.savefig(path.join(outdir, 'features.pdf')) if method_name in 'AdaBoostClassifier': # Image-level diagnostic - method plt.figure(figsize=(5, 5)) ax = plt.gca() opt = {'color': 'darkgreen', 'ls': '-', 'lw': 2} BoostedDecisionTreeDiagnostic.plot_error_rate( ax, model, data_scikit, **opt) plt.title(cam_id) plt.tight_layout() plt.savefig( os.path.join(outdir, 'bdt_diagnostic_error_rate.pdf')) plt.figure(figsize=(5, 5)) ax = plt.gca() BoostedDecisionTreeDiagnostic.plot_tree_error_rate( ax, model, **opt) plt.title(cam_id) plt.tight_layout() plt.savefig( os.path.join(outdir, 'bdt_diagnostic_tree_error_rate.pdf')) # Image-level diagnostic - model output fig, ax = diagnostic[cam_id].plot_image_model_output_distribution( nbin=50) ax[0].set_xlim([0, 1]) plt.title(cam_id) fig.tight_layout() fig.savefig(os.path.join(outdir, 'image_distribution.pdf')) # Image-level diagnostic - ROC curve on train and test samples plt.figure(figsize=(5, 5)) ax = plt.gca() plot_roc_curve( ax, diagnostic[cam_id].data_train[ diagnostic[cam_id].model_output_name], diagnostic[cam_id].data_train['label'], **dict(color='darkgreen', lw=2, label='Training sample')) plot_roc_curve( ax, data_test[diagnostic[cam_id].model_output_name], diagnostic[cam_id].data_test['label'], **dict(color='darkorange', lw=2, label='Test sample')) ax.set_xlabel('False Positive Rate') ax.set_ylabel('True Positive Rate') ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') ax.legend(loc='lower right') plt.title(cam_id) plt.tight_layout() plt.savefig(os.path.join(outdir, 'image_roc_curve.pdf')) # Parameters for energy variation cut_list = [ 'reco_energy >= {:.2f} and reco_energy <= {:.2f}'.format( energy_edges[i], energy_edges[i + 1]) for i in range(len(energy_edges) - 1) ] hist_kwargs_list = [{ 'edgecolor': 'blue', 'color': 'blue', 'label': 'Gamma training sample', 'alpha': 0.2, 'fill': True, 'ls': '-', 'lw': 2 }, { 'edgecolor': 'blue', 'color': 'blue', 'label': 'Gamma test sample', 'alpha': 1, 'fill': False, 'ls': '--', 'lw': 2 }, { 'edgecolor': 'red', 'color': 'red', 'label': 'Proton training sample', 'alpha': 0.2, 'fill': True, 'ls': '-', 'lw': 2 }, { 'edgecolor': 'red', 'color': 'red', 'label': 'Proton test sample', 'alpha': 1, 'fill': False, 'ls': '--', 'lw': 2 }] error_kw_list = [ dict(ecolor='blue', lw=2, capsize=3, capthick=2, alpha=0.2), dict(ecolor='blue', lw=2, capsize=3, capthick=2, alpha=1), dict(ecolor='red', lw=2, capsize=3, capthick=2, alpha=0.2), dict(ecolor='red', lw=2, capsize=3, capthick=2, alpha=1) ] # Image-level diagnostic - model output distribution variation n_feature = len(cut_list) ncols = 2 nrows = int(n_feature / ncols) if n_feature % ncols == 0 else int( (n_feature + 1) / ncols) fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(5 * ncols, 3 * nrows)) if nrows == 1 and ncols == 1: axes = [axes] else: axes = axes.flatten() data_list = [ data_train.query('label==1'), data_test.query('label==1'), data_train.query('label==0'), data_test.query('label==0') ] for i, colname in enumerate(cut_list): ax = axes[i] # Range for binning the_range = [0, 1] for j, data in enumerate(data_list): if len(data) == 0: continue ax = plot_hist(ax=ax, data=data.query( cut_list[i])[ouput_model_name], nbin=30, limit=the_range, norm=True, yerr=True, hist_kwargs=hist_kwargs_list[j], error_kw=error_kw_list[j]) ax.set_xlim(the_range) ax.set_xlabel(ouput_model_name) ax.set_ylabel('Arbitrary units') ax.legend(loc='best', fontsize='x-small') ax.set_title(cut_list[i]) ax.grid() fig.tight_layout() fig.savefig(path.join(outdir, 'image_distribution_variation.pdf')) # Image-level diagnostic - ROC curve variation on test sample plt.figure(figsize=(5, 5)) ax = plt.gca() color = 1. step_color = 1. / (len(cut_list)) for i, cut in enumerate(cut_list): c = color - (i + 1) * step_color data = data_test.query(cut) if len(data) == 0: continue opt = dict(color=str(c), lw=2, label='{}'.format(cut.replace('reco_energy', 'E'))) plot_roc_curve(ax, data[ouput_model_name], data['label'], **opt) ax.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--') ax.set_title(cam_id) ax.set_xlabel('False Positive Rate') ax.set_ylabel('True Positive Rate') ax.legend(loc="lower right", fontsize='x-small') plt.tight_layout() plt.savefig(os.path.join(outdir, 'image_roc_curve_variation.pdf'))
def main(): # Argument parser parser = make_argparser() parser.add_argument( "--debug", action="store_true", help="Print debugging information", ) parser.add_argument( "--save_images", action="store_true", help="Save also all images", ) parser.add_argument( "--estimate_energy", type=str2bool, default=False, help="Estimate the events' energy with a regressor from\ protopipe.scripts.build_model", ) parser.add_argument("--regressor_dir", type=str, default="./", help="regressors directory") args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) try: # If the user didn't specify a site and/or and array... site = cfg["General"]["site"] array = cfg["General"]["array"] except KeyError: # ...raise an error and exit. print("\033[91m ERROR: make sure that both 'site' and 'array' are " "specified in the analysis configuration file! \033[0m") exit() if args.infile_list: filenamelist = [] for f in args.infile_list: filenamelist += glob("{}/{}".format(args.indir, f)) filenamelist.sort() else: raise ValueError("don't know which input to use...") if not filenamelist: print("no files found; check indir: {}".format(args.indir)) exit(-1) else: print("found {} files".format(len(filenamelist))) # Get the IDs of the involved telescopes and associated cameras together # with the equivalent focal lengths from the first event allowed_tels, cams_and_foclens, subarray = prod3b_array( filenamelist[0], site, array) # keeping track of events and where they were rejected evt_cutflow = CutFlow("EventCutFlow") img_cutflow = CutFlow("ImageCutFlow") preper = EventPreparer( config=cfg, subarray=subarray, cams_and_foclens=cams_and_foclens, mode=args.mode, event_cutflow=evt_cutflow, image_cutflow=img_cutflow, ) # catch ctr-c signal to exit current loop and still display results signal_handler = SignalHandler() signal.signal(signal.SIGINT, signal_handler) # Regressor method regressor_method = cfg["EnergyRegressor"]["method_name"] # wrapper for the scikit-learn regressor if args.estimate_energy is True: regressor_files = (args.regressor_dir + "/regressor_{mode}_{cam_id}_{regressor}.pkl.gz") reg_file = regressor_files.format( **{ "mode": args.mode, "wave_args": "mixed", # ToDo, control "regressor": regressor_method, "cam_id": "{cam_id}", }) regressor = EnergyRegressor.load(reg_file, cam_id_list=cams_and_foclens.keys()) # COLUMN DESCRIPTOR AS DICTIONARY # Column descriptor for the file containing output training data.""" DataTrainingOutput = dict( # ====================================================================== # ARRAY obs_id=tb.Int16Col(dflt=1, pos=0), event_id=tb.Int32Col(dflt=1, pos=1), tel_id=tb.Int16Col(dflt=1, pos=2), N_LST=tb.Int16Col(dflt=1, pos=3), N_MST=tb.Int16Col(dflt=1, pos=4), N_SST=tb.Int16Col(dflt=1, pos=5), n_tel_reco=tb.FloatCol(dflt=1, pos=6), n_tel_discri=tb.FloatCol(dflt=1, pos=7), # ====================================================================== # DL1 hillas_intensity_reco=tb.Float32Col(dflt=1, pos=8), hillas_intensity=tb.Float32Col(dflt=1, pos=9), hillas_x_reco=tb.Float32Col(dflt=1, pos=10), hillas_y_reco=tb.Float32Col(dflt=1, pos=11), hillas_x=tb.Float32Col(dflt=1, pos=12), hillas_y=tb.Float32Col(dflt=1, pos=13), hillas_r_reco=tb.Float32Col(dflt=1, pos=14), hillas_r=tb.Float32Col(dflt=1, pos=15), hillas_phi_reco=tb.Float32Col(dflt=1, pos=16), hillas_phi=tb.Float32Col(dflt=1, pos=17), hillas_length_reco=tb.Float32Col(dflt=1, pos=18), hillas_length=tb.Float32Col(dflt=1, pos=19), hillas_width_reco=tb.Float32Col(dflt=1, pos=20), hillas_width=tb.Float32Col(dflt=1, pos=21), hillas_psi_reco=tb.Float32Col(dflt=1, pos=22), hillas_psi=tb.Float32Col(dflt=1, pos=23), hillas_skewness_reco=tb.Float32Col(dflt=1, pos=24), hillas_skewness=tb.Float32Col(dflt=1, pos=25), hillas_kurtosis=tb.Float32Col(dflt=1, pos=26), hillas_kurtosis_reco=tb.Float32Col(dflt=1, pos=27), leakage_intensity_width_1_reco=tb.Float32Col(dflt=np.nan, pos=28), leakage_intensity_width_2_reco=tb.Float32Col(dflt=np.nan, pos=29), leakage_intensity_width_1=tb.Float32Col(dflt=np.nan, pos=30), leakage_intensity_width_2=tb.Float32Col(dflt=np.nan, pos=31), # The following are missing from current ctapipe DL1 output # Not sure if it's worth to add them hillas_ellipticity_reco=tb.FloatCol(dflt=1, pos=32), hillas_ellipticity=tb.FloatCol(dflt=1, pos=33), max_signal_cam=tb.Float32Col(dflt=1, pos=34), pixels=tb.Int16Col(dflt=1, pos=35), clusters=tb.Int16Col(dflt=-1, pos=36), # ====================================================================== # DL2 - DIRECTION RECONSTRUCTION impact_dist=tb.Float32Col(dflt=1, pos=37), h_max=tb.Float32Col(dflt=1, pos=38), alt=tb.Float32Col(dflt=np.nan, pos=39), az=tb.Float32Col(dflt=np.nan, pos=40), err_est_pos=tb.Float32Col(dflt=1, pos=41), err_est_dir=tb.Float32Col(dflt=1, pos=42), xi=tb.Float32Col(dflt=np.nan, pos=43), offset=tb.Float32Col(dflt=np.nan, pos=44), mc_core_x=tb.FloatCol(dflt=1, pos=45), mc_core_y=tb.FloatCol(dflt=1, pos=46), reco_core_x=tb.FloatCol(dflt=1, pos=47), reco_core_y=tb.FloatCol(dflt=1, pos=48), mc_h_first_int=tb.FloatCol(dflt=1, pos=49), mc_x_max=tb.Float32Col(dflt=np.nan, pos=50), is_valid=tb.BoolCol(dflt=False, pos=51), good_image=tb.Int16Col(dflt=1, pos=52), # ====================================================================== # DL2 - ENERGY ESTIMATION true_energy=tb.FloatCol(dflt=1, pos=53), reco_energy=tb.FloatCol(dflt=np.nan, pos=54), reco_energy_tel=tb.Float32Col(dflt=np.nan, pos=55), # ====================================================================== # DL1 IMAGES # this is optional data saved by the user # since these data declarations require to know how many pixels # each saved image will have, # we add them later on, right before creating the table # We list them here for reference # true_image=tb.Float32Col(shape=(1855), pos=56), # reco_image=tb.Float32Col(shape=(1855), pos=57), # cleaning_mask_reco=tb.BoolCol(shape=(1855), pos=58), # not in ctapipe ) outfile = tb.open_file(args.outfile, mode="w") outTable = {} outData = {} for i, filename in enumerate(filenamelist): print("file: {} filename = {}".format(i, filename)) source = event_source(input_url=filename, allowed_tels=allowed_tels, max_events=args.max_events) # loop that cleans and parametrises the images and performs the # reconstruction for each event for ( event, reco_image, cleaning_mask_reco, cleaning_mask_clusters, true_image, n_pixel_dict, hillas_dict, hillas_dict_reco, leakage_dict, n_tels, max_signals, n_cluster_dict, reco_result, impact_dict, good_event, good_for_reco, ) in preper.prepare_event(source, save_images=args.save_images, debug=args.debug): # Angular quantities run_array_direction = event.mcheader.run_array_direction if good_event: xi = angular_separation(event.mc.az, event.mc.alt, reco_result.az, reco_result.alt) offset = angular_separation( run_array_direction[0], # az run_array_direction[1], # alt reco_result.az, reco_result.alt, ) # Impact parameter reco_core_x = reco_result.core_x reco_core_y = reco_result.core_y # Height of shower maximum h_max = reco_result.h_max # Todo add conversion in number of radiation length, # need an atmosphere profile is_valid = True else: # something went wrong and the shower's reconstruction failed xi = np.nan * u.deg offset = np.nan * u.deg reco_core_x = np.nan * u.m reco_core_y = np.nan * u.m h_max = np.nan * u.m reco_result.alt = np.nan * u.deg reco_result.az = np.nan * u.deg is_valid = False reco_energy = np.nan reco_energy_tel = dict() # Not optimal at all, two loop on tel!!! # For energy estimation # Estimate energy only if the shower was reconstructed if (args.estimate_energy is True) and is_valid: weight_tel = np.zeros(len(hillas_dict.keys())) energy_tel = np.zeros(len(hillas_dict.keys())) for idx, tel_id in enumerate(hillas_dict.keys()): # use only images that survived cleaning and # parametrization if not good_for_reco[tel_id]: # bad images will get an undetermined energy # this is a per-telescope energy # NOT the estimated energy for the shower reco_energy_tel[tel_id] = np.nan continue cam_id = source.subarray.tel[tel_id].camera.camera_name moments = hillas_dict[tel_id] model = regressor.model_dict[cam_id] features_img = np.array([ np.log10(moments.intensity), np.log10(impact_dict[tel_id].value), moments.width.value, moments.length.value, h_max.value, ]) energy_tel[idx] = model.predict([features_img]) weight_tel[idx] = moments.intensity reco_energy_tel[tel_id] = energy_tel[idx] reco_energy = np.sum(weight_tel * energy_tel) / sum(weight_tel) else: for idx, tel_id in enumerate(hillas_dict.keys()): reco_energy_tel[tel_id] = np.nan for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = source.subarray.tel[tel_id].camera.camera_name if cam_id not in outData: if args.save_images is True: # we define and save images content here, to make it # adaptive to different cameras n_pixels = source.subarray.tel[ tel_id].camera.geometry.n_pixels DataTrainingOutput["true_image"] = tb.Float32Col( shape=(n_pixels), pos=56) DataTrainingOutput["reco_image"] = tb.Float32Col( shape=(n_pixels), pos=57) DataTrainingOutput["cleaning_mask_reco"] = tb.BoolCol( shape=(n_pixels), pos=58) # not in ctapipe DataTrainingOutput[ "cleaning_mask_clusters"] = tb.BoolCol( shape=(n_pixels), pos=58) # not in ctapipe outTable[cam_id] = outfile.create_table( "/", cam_id, DataTrainingOutput, ) outData[cam_id] = outTable[cam_id].row moments = hillas_dict[tel_id] ellipticity = moments.width / moments.length # Write to file also the Hillas parameters that have been used # to calculate reco_results moments_reco = hillas_dict_reco[tel_id] ellipticity_reco = moments_reco.width / moments_reco.length outData[cam_id]["good_image"] = good_for_reco[tel_id] outData[cam_id]["is_valid"] = is_valid outData[cam_id]["impact_dist"] = impact_dict[tel_id].to( "m").value outData[cam_id]["max_signal_cam"] = max_signals[tel_id] outData[cam_id]["hillas_intensity"] = moments.intensity outData[cam_id]["N_LST"] = n_tels["LST_LST_LSTCam"] outData[cam_id]["N_MST"] = (n_tels["MST_MST_NectarCam"] + n_tels["MST_MST_FlashCam"] + n_tels["MST_SCT_SCTCam"]) outData[cam_id]["N_SST"] = (n_tels["SST_1M_DigiCam"] + n_tels["SST_ASTRI_ASTRICam"] + n_tels["SST_GCT_CHEC"]) outData[cam_id]["hillas_width"] = moments.width.to("deg").value outData[cam_id]["hillas_length"] = moments.length.to( "deg").value outData[cam_id]["hillas_psi"] = moments.psi.to("deg").value outData[cam_id]["hillas_skewness"] = moments.skewness outData[cam_id]["hillas_kurtosis"] = moments.kurtosis outData[cam_id]["h_max"] = h_max.to("m").value outData[cam_id]["err_est_pos"] = np.nan outData[cam_id]["err_est_dir"] = np.nan outData[cam_id]["true_energy"] = event.mc.energy.to( "TeV").value outData[cam_id]["hillas_x"] = moments.x.to("deg").value outData[cam_id]["hillas_y"] = moments.y.to("deg").value outData[cam_id]["hillas_phi"] = moments.phi.to("deg").value outData[cam_id]["hillas_r"] = moments.r.to("deg").value outData[cam_id]["pixels"] = n_pixel_dict[tel_id] outData[cam_id]["obs_id"] = event.index.obs_id outData[cam_id]["event_id"] = event.index.event_id outData[cam_id]["tel_id"] = tel_id outData[cam_id]["xi"] = xi.to("deg").value outData[cam_id]["reco_energy"] = reco_energy outData[cam_id]["hillas_ellipticity"] = ellipticity.value outData[cam_id]["clusters"] = n_cluster_dict[tel_id] outData[cam_id]["n_tel_discri"] = n_tels["GOOD images"] outData[cam_id]["mc_core_x"] = event.mc.core_x.to("m").value outData[cam_id]["mc_core_y"] = event.mc.core_y.to("m").value outData[cam_id]["reco_core_x"] = reco_core_x.to("m").value outData[cam_id]["reco_core_y"] = reco_core_y.to("m").value outData[cam_id]["mc_h_first_int"] = event.mc.h_first_int.to( "m").value outData[cam_id]["offset"] = offset.to("deg").value outData[cam_id]["mc_x_max"] = event.mc.x_max.value # g / cm2 outData[cam_id]["alt"] = reco_result.alt.to("deg").value outData[cam_id]["az"] = reco_result.az.to("deg").value outData[cam_id]["reco_energy_tel"] = reco_energy_tel[tel_id] # Variables from hillas_dist_reco outData[cam_id]["n_tel_reco"] = n_tels["GOOD images"] outData[cam_id]["hillas_x_reco"] = moments_reco.x.to( "deg").value outData[cam_id]["hillas_y_reco"] = moments_reco.y.to( "deg").value outData[cam_id]["hillas_phi_reco"] = moments_reco.phi.to( "deg").value outData[cam_id][ "hillas_ellipticity_reco"] = ellipticity_reco.value outData[cam_id]["hillas_r_reco"] = moments_reco.r.to( "deg").value outData[cam_id]["hillas_skewness_reco"] = moments_reco.skewness outData[cam_id]["hillas_kurtosis_reco"] = moments_reco.kurtosis outData[cam_id]["hillas_width_reco"] = moments_reco.width.to( "deg").value outData[cam_id]["hillas_length_reco"] = moments_reco.length.to( "deg").value outData[cam_id]["hillas_psi_reco"] = moments_reco.psi.to( "deg").value outData[cam_id][ "hillas_intensity_reco"] = moments_reco.intensity outData[cam_id][ "leakage_intensity_width_1_reco"] = leakage_dict[tel_id][ "leak1_reco"] outData[cam_id][ "leakage_intensity_width_2_reco"] = leakage_dict[tel_id][ "leak2_reco"] outData[cam_id]["leakage_intensity_width_1"] = leakage_dict[ tel_id]["leak1"] outData[cam_id]["leakage_intensity_width_2"] = leakage_dict[ tel_id]["leak2"] # ======================= # IMAGES INFORMATION # ======================= if args.save_images is True: # we define and save images content here, to make it # adaptive to different cameras outData[cam_id]["true_image"] = true_image[tel_id] outData[cam_id]["reco_image"] = reco_image[tel_id] outData[cam_id]["cleaning_mask_reco"] = cleaning_mask_reco[ tel_id] outData[cam_id][ "cleaning_mask_clusters"] = cleaning_mask_clusters[ tel_id] # ======================= outData[cam_id].append() if signal_handler.stop: break if signal_handler.stop: break # make sure that all the events are properly stored for table in outTable.values(): table.flush() print(bcolors.BOLD + "\n\n==================================================\n" + "Statistical summary of processed events and images\n" + "==================================================\n" # + bcolors.ENDC ) evt_cutflow() # Catch specific cases triggered_events = evt_cutflow.cuts["min2Tels trig"][1] reconstructed_events = evt_cutflow.cuts["min2Tels reco"][1] if triggered_events == 0: print("\033[93mWARNING: No events have been triggered" " by the selected telescopes! \033[0m") else: print("\n") img_cutflow() if reconstructed_events == 0: print("\033[93m WARNING: None of the triggered events have been " "properly reconstructed by the selected telescopes!\n" "DL1 file will be empty! \033[0m") print(bcolors.ENDC)
def main(): # Read arguments parser = argparse.ArgumentParser(description='Make performance files') parser.add_argument('--config_file', type=str, required=True, help='') mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument('--wave', dest="mode", action='store_const', const="wave", default="tail", help="if set, use wavelet cleaning") mode_group.add_argument('--tail', dest="mode", action='store_const', const="tail", help="if set, use tail cleaning (default)") args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) # Create output directory if necessary outdir = os.path.join( cfg['general']['outdir'], 'performance_protopipe_{}_CTA{}_{}_Zd{}_{}_Time{:.2f}{}'.format( cfg['general']['prod'], cfg['general']['site'], cfg['general']['array'], cfg['general']['zenith'], cfg['general']['azimuth'], cfg['analysis']['obs_time']['value'], cfg['analysis']['obs_time']['unit']), ) indir = cfg['general']['indir'] template_input_file = cfg['general']['template_input_file'] T_OBS = cfg['analysis']['obs_time']['value'] * u.Unit( cfg['analysis']['obs_time']['unit']) # scaling between on and off region. # Make off region 5 times larger than on region for better # background statistics ALPHA = cfg['analysis']['alpha'] # Radius to use for calculating bg rate MAX_BG_RADIUS = cfg['analysis']['max_bg_radius'] * u.deg particles = { "gamma": { "file": os.path.join(indir, template_input_file.format(args.mode, "gamma")), "target_spectrum": CRAB_HEGRA, "run_header": cfg['particle_information']['gamma'] }, "proton": { "file": os.path.join(indir, template_input_file.format(args.mode, "proton")), "target_spectrum": IRFDOC_PROTON_SPECTRUM, "run_header": cfg['particle_information']['proton'] }, "electron": { "file": os.path.join(indir, template_input_file.format(args.mode, "electron")), "target_spectrum": IRFDOC_ELECTRON_SPECTRUM, "run_header": cfg['particle_information']['electron'] }, } logging.basicConfig(level=logging.INFO) logging.getLogger("pyirf").setLevel(logging.DEBUG) for particle_type, p in particles.items(): log.info(f"Simulated {particle_type.title()} Events:") p["events"], p["simulation_info"] = read_DL2_pyirf( p["file"], p["run_header"]) # Multiplicity cut p["events"] = p["events"][ p["events"]["multiplicity"] >= cfg['analysis'] ['cut_on_multiplicity']].copy() p["simulated_spectrum"] = PowerLaw.from_simulation( p["simulation_info"], T_OBS) # Weight events p["events"]["weight"] = calculate_event_weights( p["events"]["true_energy"], p["target_spectrum"], p["simulated_spectrum"]) for prefix in ('true', 'reco'): k = f"{prefix}_source_fov_offset" p["events"][k] = calculate_source_fov_offset(p["events"], prefix=prefix) # calculate theta / distance between reco and assuemd source positoin # we handle only ON observations here, so the assumed source pos # is the pointing position p["events"]["theta"] = calculate_theta( p["events"], assumed_source_az=p["events"]["pointing_az"], assumed_source_alt=p["events"]["pointing_alt"], ) log.info(p["simulation_info"]) log.info("") gammas = particles["gamma"]["events"] # background table composed of both electrons and protons background = table.vstack( [particles["proton"]["events"], particles["electron"]["events"]]) MAX_GH_CUT_EFFICIENCY = 0.8 GH_CUT_EFFICIENCY_STEP = 0.01 # gh cut used for first calculation of the binned theta cuts INITIAL_GH_CUT_EFFICENCY = 0.4 INITIAL_GH_CUT = np.quantile(gammas['gh_score'], (1 - INITIAL_GH_CUT_EFFICENCY)) log.info( f"Using fixed G/H cut of {INITIAL_GH_CUT} to calculate theta cuts") # event display uses much finer bins for the theta cut than # for the sensitivity theta_bins = add_overflow_bins( create_bins_per_decade( 10**(-1.9) * u.TeV, 10**2.3005 * u.TeV, 50, )) # theta cut is 68 percent containmente of the gammas # for now with a fixed global, unoptimized score cut mask_theta_cuts = gammas["gh_score"] >= INITIAL_GH_CUT theta_cuts = calculate_percentile_cut( gammas["theta"][mask_theta_cuts], gammas["reco_energy"][mask_theta_cuts], bins=theta_bins, min_value=0.05 * u.deg, fill_value=0.32 * u.deg, max_value=0.32 * u.deg, percentile=68, ) # same bins as event display uses sensitivity_bins = add_overflow_bins( create_bins_per_decade(10**-1.9 * u.TeV, 10**2.31 * u.TeV, bins_per_decade=5)) log.info("Optimizing G/H separation cut for best sensitivity") gh_cut_efficiencies = np.arange( GH_CUT_EFFICIENCY_STEP, MAX_GH_CUT_EFFICIENCY + GH_CUT_EFFICIENCY_STEP / 2, GH_CUT_EFFICIENCY_STEP) sensitivity_step_2, gh_cuts = optimize_gh_cut( gammas, background, reco_energy_bins=sensitivity_bins, gh_cut_efficiencies=gh_cut_efficiencies, op=operator.ge, theta_cuts=theta_cuts, alpha=ALPHA, background_radius=MAX_BG_RADIUS, ) # now that we have the optimized gh cuts, we recalculate the theta # cut as 68 percent containment on the events surviving these cuts. log.info('Recalculating theta cut for optimized GH Cuts') for tab in (gammas, background): tab["selected_gh"] = evaluate_binned_cut(tab["gh_score"], tab["reco_energy"], gh_cuts, operator.ge) theta_cuts_opt = calculate_percentile_cut( gammas[gammas['selected_gh']]["theta"], gammas[gammas['selected_gh']]["reco_energy"], theta_bins, percentile=68, fill_value=0.32 * u.deg, max_value=0.32 * u.deg, min_value=0.05 * u.deg, ) gammas["selected_theta"] = evaluate_binned_cut(gammas["theta"], gammas["reco_energy"], theta_cuts_opt, operator.le) gammas["selected"] = gammas["selected_theta"] & gammas["selected_gh"] # calculate sensitivity signal_hist = create_histogram_table(gammas[gammas["selected"]], bins=sensitivity_bins) background_hist = estimate_background( background[background["selected_gh"]], reco_energy_bins=sensitivity_bins, theta_cuts=theta_cuts_opt, alpha=ALPHA, background_radius=MAX_BG_RADIUS, ) sensitivity = calculate_sensitivity(signal_hist, background_hist, alpha=ALPHA) # scale relative sensitivity by Crab flux to get the flux sensitivity spectrum = particles['gamma']['target_spectrum'] for s in (sensitivity_step_2, sensitivity): s["flux_sensitivity"] = (s["relative_sensitivity"] * spectrum(s['reco_energy_center'])) log.info('Calculating IRFs') hdus = [ fits.PrimaryHDU(), fits.BinTableHDU(sensitivity, name="SENSITIVITY"), fits.BinTableHDU(sensitivity_step_2, name="SENSITIVITY_STEP_2"), fits.BinTableHDU(theta_cuts, name="THETA_CUTS"), fits.BinTableHDU(theta_cuts_opt, name="THETA_CUTS_OPT"), fits.BinTableHDU(gh_cuts, name="GH_CUTS"), ] masks = { "": gammas["selected"], "_NO_CUTS": slice(None), "_ONLY_GH": gammas["selected_gh"], "_ONLY_THETA": gammas["selected_theta"], } # binnings for the irfs true_energy_bins = add_overflow_bins( create_bins_per_decade(10**-1.9 * u.TeV, 10**2.31 * u.TeV, 10)) reco_energy_bins = add_overflow_bins( create_bins_per_decade(10**-1.9 * u.TeV, 10**2.31 * u.TeV, 5)) fov_offset_bins = [0, 0.5] * u.deg source_offset_bins = np.arange(0, 1 + 1e-4, 1e-3) * u.deg energy_migration_bins = np.geomspace(0.2, 5, 200) for label, mask in masks.items(): effective_area = effective_area_per_energy( gammas[mask], particles["gamma"]["simulation_info"], true_energy_bins=true_energy_bins, ) hdus.append( create_aeff2d_hdu( effective_area[..., np.newaxis], # +1 dimension for FOV offset true_energy_bins, fov_offset_bins, extname="EFFECTIVE_AREA" + label, )) edisp = energy_dispersion( gammas[mask], true_energy_bins=true_energy_bins, fov_offset_bins=fov_offset_bins, migration_bins=energy_migration_bins, ) hdus.append( create_energy_dispersion_hdu( edisp, true_energy_bins=true_energy_bins, migration_bins=energy_migration_bins, fov_offset_bins=fov_offset_bins, extname="ENERGY_DISPERSION" + label, )) # Here we use reconstructed energy instead of true energy for the sake of # current pipelines comparisons bias_resolution = energy_bias_resolution(gammas[gammas["selected"]], reco_energy_bins, energy_type="reco") # Here we use reconstructed energy instead of true energy for the sake of # current pipelines comparisons ang_res = angular_resolution(gammas[gammas["selected_gh"]], reco_energy_bins, energy_type="reco") psf = psf_table( gammas[gammas["selected_gh"]], true_energy_bins, fov_offset_bins=fov_offset_bins, source_offset_bins=source_offset_bins, ) background_rate = background_2d( background[background['selected_gh']], reco_energy_bins, fov_offset_bins=np.arange(0, 11) * u.deg, t_obs=T_OBS, ) hdus.append( create_background_2d_hdu( background_rate, reco_energy_bins, fov_offset_bins=np.arange(0, 11) * u.deg, )) hdus.append( create_psf_table_hdu( psf, true_energy_bins, source_offset_bins, fov_offset_bins, )) hdus.append( create_rad_max_hdu(theta_cuts_opt["cut"][:, np.newaxis], theta_bins, fov_offset_bins)) hdus.append(fits.BinTableHDU(ang_res, name="ANGULAR_RESOLUTION")) hdus.append( fits.BinTableHDU(bias_resolution, name="ENERGY_BIAS_RESOLUTION")) log.info('Writing outputfile') fits.HDUList(hdus).writeto(outdir + '.fits.gz', overwrite=True)
def main(): # Argument parser parser = make_argparser() parser.add_argument( "--debug", action="store_true", help="Print debugging information", ) parser.add_argument("--regressor_dir", default="./", help="regressors directory") parser.add_argument("--classifier_dir", default="./", help="regressors directory") parser.add_argument( "--force_tailcut_for_extended_cleaning", type=str2bool, default=False, help="For tailcut cleaning for energy/score estimation", ) parser.add_argument( "--save_images", action="store_true", help="Save images in images.h5 (one file testing)", ) parser.add_argument( "--regressor_config", type=str, default=None, help="Configuration file used to produce regressor model") parser.add_argument( "--classifier_config", type=str, default=None, help="Configuration file used to produce classification model") args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) try: # If the user didn't specify a site and/or and array... site = cfg["General"]["site"] array = cfg["General"]["array"] except KeyError: # ...raise an error and exit. print(bcolors.FAIL + "ERROR: make sure that both 'site' and 'array' are " + "specified in the analysis configuration file!" + bcolors.ENDC) exit() # Add force_tailcut_for_extended_cleaning in configuration cfg["General"][ "force_tailcut_for_extended_cleaning"] = args.force_tailcut_for_extended_cleaning cfg["General"]["force_mode"] = "tail" force_mode = args.mode if cfg["General"]["force_tailcut_for_extended_cleaning"] is True: force_mode = "tail" print("force_mode={}".format(force_mode)) print("mode={}".format(args.mode)) if args.infile_list: filenamelist = [] for f in args.infile_list: filenamelist += glob("{}/{}".format(args.indir, f)) filenamelist.sort() if not filenamelist: print("no files found; check indir: {}".format(args.indir)) exit(-1) # Get the IDs of the involved telescopes and associated cameras together # with the equivalent focal lengths from the first event allowed_tels, cams_and_foclens, subarray = prod3b_array( filenamelist[0], site, array) # keeping track of events and where they were rejected evt_cutflow = CutFlow("EventCutFlow") img_cutflow = CutFlow("ImageCutFlow") # Event preparer preper = EventPreparer( config=cfg, subarray=subarray, cams_and_foclens=cams_and_foclens, mode=args.mode, event_cutflow=evt_cutflow, image_cutflow=img_cutflow, ) # Regressor and classifier methods regressor_method = cfg["EnergyRegressor"]["method_name"] classifier_method = cfg["GammaHadronClassifier"]["method_name"] use_proba_for_classifier = cfg["GammaHadronClassifier"]["use_proba"] if regressor_method in ["None", "none", None]: print(bcolors.OKBLUE + "The energy of the event will NOT be estimated." + bcolors.ENDC) use_regressor = False else: use_regressor = True if classifier_method in ["None", "none", None]: if args.debug: print(bcolors.OKBLUE + "The particle type of the event will NOT be estimated." + bcolors.ENDC) use_classifier = False else: use_classifier = True # Classifiers if use_classifier: # Read configuration file classifier_config = load_config(args.classifier_config) classifier_files = (args.classifier_dir + "/classifier_{cam_id}_{classifier}.pkl.gz") clf_file = classifier_files.format( **{ "mode": force_mode, "wave_args": "mixed", "classifier": classifier_method, "cam_id": "{cam_id}", }) classifiers = load_models(clf_file, cam_id_list=cams_and_foclens.keys()) if args.debug: print(bcolors.OKBLUE + "The particle type of the event will be estimated" + " using the models stored in" + f" {args.classifier_dir}\n" + bcolors.ENDC) # Regressors if use_regressor: # Read configuration file regressor_config = load_config(args.regressor_config) regressor_files = (args.regressor_dir + "/regressor_{cam_id}_{regressor}.pkl.gz") reg_file = regressor_files.format( **{ "mode": force_mode, "wave_args": "mixed", "regressor": regressor_method, "cam_id": "{cam_id}", }) regressors = load_models(reg_file, cam_id_list=cams_and_foclens.keys()) if args.debug: print(bcolors.OKBLUE + "The energy of the event will be estimated" + " using the models stored in" + f" {args.regressor_dir}\n" + bcolors.ENDC) # catch ctr-c signal to exit current loop and still display results signal_handler = SignalHandler() signal.signal(signal.SIGINT, signal_handler) # Declaration of the column descriptor for the (possible) images file StoredImages = dict( event_id=tb.Int32Col(dflt=1, pos=0), tel_id=tb.Int16Col(dflt=1, pos=1) # reco_image, true_image and cleaning_mask_reco # are defined later sicne they depend on the number of pixels ) # this class defines the reconstruction parameters to keep track of class RecoEvent(tb.IsDescription): obs_id = tb.Int16Col(dflt=-1, pos=0) event_id = tb.Int32Col(dflt=-1, pos=1) NTels_trig = tb.Int16Col(dflt=0, pos=2) NTels_reco = tb.Int16Col(dflt=0, pos=3) NTels_reco_lst = tb.Int16Col(dflt=0, pos=4) NTels_reco_mst = tb.Int16Col(dflt=0, pos=5) NTels_reco_sst = tb.Int16Col(dflt=0, pos=6) pointing_az = tb.Float32Col(dflt=np.nan, pos=7) pointing_alt = tb.Float32Col(dflt=np.nan, pos=8) true_az = tb.Float32Col(dflt=np.nan, pos=9) true_alt = tb.Float32Col(dflt=np.nan, pos=10) true_energy = tb.Float32Col(dflt=np.nan, pos=11) reco_energy = tb.Float32Col(dflt=np.nan, pos=12) reco_alt = tb.Float32Col(dflt=np.nan, pos=13) reco_az = tb.Float32Col(dflt=np.nan, pos=14) offset = tb.Float32Col(dflt=np.nan, pos=15) xi = tb.Float32Col(dflt=np.nan, pos=16) ErrEstPos = tb.Float32Col(dflt=np.nan, pos=17) ErrEstDir = tb.Float32Col(dflt=np.nan, pos=18) gammaness = tb.Float32Col(dflt=np.nan, pos=19) success = tb.BoolCol(dflt=False, pos=20) score = tb.Float32Col(dflt=np.nan, pos=21) h_max = tb.Float32Col(dflt=np.nan, pos=22) reco_core_x = tb.Float32Col(dflt=np.nan, pos=23) reco_core_y = tb.Float32Col(dflt=np.nan, pos=24) true_core_x = tb.Float32Col(dflt=np.nan, pos=25) true_core_y = tb.Float32Col(dflt=np.nan, pos=26) is_valid = tb.BoolCol(dflt=False, pos=27) reco_outfile = tb.open_file( mode="w", # if no outfile name is given (i.e. don't to write the event list to disk), # need specify two "driver" arguments **({ "filename": args.outfile } if args.outfile else { "filename": "no_outfile.h5", "driver": "H5FD_CORE", "driver_core_backing_store": False, })) reco_table = reco_outfile.create_table("/", "reco_events", RecoEvent) reco_event = reco_table.row # Create the images file only if the user want to store the images if args.save_images is True: images_outfile = tb.open_file("images.h5", mode="w") images_table = {} images_phe = {} for i, filename in enumerate(filenamelist): source = EventSource(input_url=filename, allowed_tels=allowed_tels, max_events=args.max_events) # loop that cleans and parametrises the images and performs the reconstruction for ( event, reco_image, cleaning_mask_reco, cleaning_mask_clusters, true_image, n_pixel_dict, hillas_dict, hillas_dict_reco, leakage_dict, n_tels, max_signals, n_cluster_dict, reco_result, impact_dict, good_event, good_for_reco, ) in preper.prepare_event(source, save_images=args.save_images, debug=args.debug): # True direction true_az = event.simulation.shower.az true_alt = event.simulation.shower.alt # Array pointing in AltAz frame pointing_az = event.pointing.array_azimuth pointing_alt = event.pointing.array_altitude if good_event: # aka it has been successfully reconstructed # Angular separation between # - true direction # - reconstruted direction xi = angular_separation(event.simulation.shower.az, event.simulation.shower.alt, reco_result.az, reco_result.alt) # Angular separation between # - center of the array's FoV # - reconstructed direction offset = angular_separation( pointing_az, pointing_alt, reco_result.az, reco_result.alt, ) # Reconstructed height of shower maximum h_max = reco_result.h_max # Reconstructed position of the shower's core on the ground reco_core_x = reco_result.core_x reco_core_y = reco_result.core_y # Reconstructed direction of the shower's in the sky alt, az = reco_result.alt, reco_result.az # Successfully reconstructed shower is_valid = True else: # no successful reconstruction assign dummy values xi = np.nan * u.deg offset = np.nan * u.deg reco_core_x = np.nan * u.m reco_core_y = np.nan * u.m h_max = np.nan * u.m alt = np.nan * u.deg az = np.nan * u.deg is_valid = False reco_energy = np.nan score = np.nan gammaness = np.nan reco_event["success"] = False # Estimate particle energy if use_regressor and is_valid: energy_tel = np.zeros(len(hillas_dict.keys())) energy_tel_classifier = {} weight_tel = np.zeros(len(hillas_dict.keys())) for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = source.subarray.tel[tel_id].camera.camera_name moments = hillas_dict[tel_id] model = regressors[cam_id] ############################################################ # GET FEATURES ############################################################ # Read feature list from model configutation file features_basic = regressor_config["FeatureList"]["Basic"] features_derived = regressor_config["FeatureList"][ "Derived"] features = features_basic + list(features_derived) # Create a pandas Dataframe with basic quantities # This is needed in order to connect the I/O system of the # model inputs to the in-memory computation of this script data = pd.DataFrame({ "hillas_intensity": [moments.intensity], "hillas_width": [moments.width.to("deg").value], "hillas_length": [moments.length.to("deg").value], "hillas_x": [moments.x.to("deg").value], "hillas_y": [moments.y.to("deg").value], "hillas_phi": [moments.phi.to("deg").value], "hillas_r": [moments.r.to("deg").value], "leakage_intensity_width_1_reco": [leakage_dict[tel_id]['leak1_reco']], "leakage_intensity_width_2_reco": [leakage_dict[tel_id]['leak2_reco']], "leakage_intensity_width_1": [leakage_dict[tel_id]['leak1']], "leakage_intensity_width_2": [leakage_dict[tel_id]['leak2']], "az": [reco_result.az.to("deg").value], "alt": [reco_result.alt.to("deg").value], "h_max": [h_max.value], "impact_dist": [impact_dict[tel_id].to("m").value], }) # Compute derived features and add them to the dataframe for key, expression in features_derived.items(): if key not in data: data.eval(f'{key} = {expression}', inplace=True) # sort features_to_use alphabetically to ensure order # preservation with model.fit in protopipe.mva features = sorted(features) # Select the values for the full set of features features_values = data[features].to_numpy() ############################################################ if good_for_reco[tel_id] == 1: energy_tel[idx] = model.predict(features_values) else: energy_tel[idx] = np.nan weight_tel[idx] = moments.intensity # Record the values regardless of the validity # We don't use this now, but it should be recorded energy_tel_classifier[tel_id] = energy_tel[idx] # Use only images with valid estimated energies to calculate # the average energy_tel_selected = energy_tel[~np.isnan(energy_tel)] weight_tel_selected = weight_tel[~np.isnan(energy_tel)] # Try getting the average weighted energy of the shower # If no image had a valid estimated energy record it as nan if len(energy_tel_selected) == 0: reco_energy = np.nan energy_estimated = False else: reco_energy = np.sum( weight_tel_selected * energy_tel_selected) / sum(weight_tel_selected) energy_estimated = True else: reco_energy = np.nan energy_estimated = False # Estimate particle score/gammaness if use_classifier and is_valid: score_tel = np.zeros(len(hillas_dict.keys())) gammaness_tel = np.zeros(len(hillas_dict.keys())) weight_tel = np.zeros(len(hillas_dict.keys())) for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = source.subarray.tel[tel_id].camera.camera_name moments = hillas_dict[tel_id] model = classifiers[cam_id] ############################################################ # GET FEATURES ############################################################ # Read feature list from model configutation file features_basic = classifier_config["FeatureList"]["Basic"] features_derived = classifier_config["FeatureList"][ "Derived"] features = features_basic + list(features_derived) # Create a pandas Dataframe with basic quantities # This is needed in order to connect the I/O system of the # model inputs to the in-memory computation of this script data = pd.DataFrame({ "hillas_intensity": [moments.intensity], "hillas_width": [moments.width.to("deg").value], "hillas_length": [moments.length.to("deg").value], "hillas_x": [moments.x.to("deg").value], "hillas_y": [moments.y.to("deg").value], "hillas_phi": [moments.phi.to("deg").value], "hillas_r": [moments.r.to("deg").value], "leakage_intensity_width_1_reco": [leakage_dict[tel_id]['leak1_reco']], "leakage_intensity_width_2_reco": [leakage_dict[tel_id]['leak2_reco']], "leakage_intensity_width_1": [leakage_dict[tel_id]['leak1']], "leakage_intensity_width_2": [leakage_dict[tel_id]['leak2']], "az": [reco_result.az.to("deg").value], "alt": [reco_result.alt.to("deg").value], "h_max": [h_max.value], "impact_dist": [impact_dict[tel_id].to("m").value], "reco_energy": reco_energy, "reco_energy_tel": energy_tel_classifier[tel_id], }) # Compute derived features and add them to the dataframe for key, expression in features_derived.items(): if key not in data: data.eval(f'{key} = {expression}', inplace=True) # sort features_to_use alphabetically to ensure order # preservation with model.fit in protopipe.mva features = sorted(features) # Select the values for the full set of features features_values = data[features].to_numpy() ############################################################ # Here we check for valid telescope-wise energies # Because it means that it's a good image # WARNING: currently we should REQUIRE to estimate both # energy AND particle type if not np.isnan(energy_tel_classifier[tel_id]): # Output of classifier according to type of classifier if use_proba_for_classifier is False: score_tel[idx] = model.decision_function( features_values) else: gammaness_tel[idx] = model.predict_proba( features_values)[:, 1] weight_tel[idx] = np.sqrt(moments.intensity) else: # WARNING: # this is true only because we use telescope-wise # energies as a feature of the model!!! score_tel[idx] = np.nan gammaness_tel[idx] = np.nan # Use only images with valid estimated energies to calculate # the average if use_proba_for_classifier is False: score_tel_selected = score_tel[~np.isnan(score_tel)] weight_tel_selected = weight_tel[~np.isnan(score_tel)] else: gammaness_tel_selected = gammaness_tel[ ~np.isnan(gammaness_tel)] weight_tel_selected = weight_tel[~np.isnan(gammaness_tel)] # Try getting the average weighted score or gammaness # If no image had a valid estimated energy record it as nan if len(weight_tel_selected) > 0: # Weight the final decision/proba if use_proba_for_classifier is True: gammaness = np.sum( weight_tel_selected * gammaness_tel_selected) / sum(weight_tel_selected) else: score = np.sum( weight_tel_selected * score_tel_selected) / sum(weight_tel_selected) particle_type_estimated = True else: score = np.nan gammaness = np.nan particle_type_estimated = False else: score = np.nan gammaness = np.nan particle_type_estimated = False if energy_estimated and particle_type_estimated: reco_event["success"] = True else: if args.debug: print( bcolors.WARNING + f"energy_estimated = {energy_estimated}\n" + f"particle_type_estimated = {particle_type_estimated}\n" + bcolors.ENDC) reco_event["success"] = False # If the user wants to save the images of the run if args.save_images is True: for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = source.subarray.tel[tel_id].camera.camera_name if cam_id not in images_phe: n_pixels = source.subarray.tel[ tel_id].camera.geometry.n_pixels StoredImages["true_image"] = tb.Float32Col( shape=(n_pixels), pos=2) StoredImages["reco_image"] = tb.Float32Col( shape=(n_pixels), pos=3) StoredImages["cleaning_mask_reco"] = tb.BoolCol( shape=(n_pixels), pos=4) # not in ctapipe StoredImages["cleaning_mask_clusters"] = tb.BoolCol( shape=(n_pixels), pos=5) # not in ctapipe images_table[cam_id] = images_outfile.create_table( "/", "_".join(["images", cam_id]), StoredImages) images_phe[cam_id] = images_table[cam_id].row images_phe[cam_id]["event_id"] = event.index.event_id images_phe[cam_id]["tel_id"] = tel_id images_phe[cam_id]["reco_image"] = reco_image[tel_id] images_phe[cam_id]["true_image"] = true_image[tel_id] images_phe[cam_id][ "cleaning_mask_reco"] = cleaning_mask_reco[tel_id] images_phe[cam_id][ "cleaning_mask_clusters"] = cleaning_mask_clusters[ tel_id] images_phe[cam_id].append() # Now we start recording the data to file reco_event["event_id"] = event.index.event_id reco_event["obs_id"] = event.index.obs_id reco_event["NTels_trig"] = len(event.r1.tel.keys()) reco_event["NTels_reco"] = len(hillas_dict) reco_event["NTels_reco_lst"] = n_tels["LST_LST_LSTCam"] reco_event["NTels_reco_mst"] = (n_tels["MST_MST_NectarCam"] + n_tels["MST_MST_FlashCam"] + n_tels["MST_SCT_SCTCam"]) reco_event["NTels_reco_sst"] = (n_tels["SST_1M_DigiCam"] + n_tels["SST_ASTRI_ASTRICam"] + n_tels["SST_GCT_CHEC"]) reco_event["pointing_az"] = pointing_az.to("deg").value reco_event["pointing_alt"] = pointing_alt.to("deg").value reco_event["reco_energy"] = reco_energy reco_event["reco_alt"] = alt.to("deg").value reco_event["reco_az"] = az.to("deg").value reco_event["offset"] = offset.to("deg").value reco_event["xi"] = xi.to("deg").value reco_event["h_max"] = h_max.to("m").value reco_event["reco_core_x"] = reco_core_x.to("m").value reco_event["reco_core_y"] = reco_core_y.to("m").value reco_event["is_valid"] = is_valid if use_proba_for_classifier is True: reco_event["gammaness"] = gammaness else: reco_event["score"] = score reco_event["ErrEstPos"] = np.nan reco_event["ErrEstDir"] = np.nan # Simulated information shower = event.simulation.shower mc_core_x = shower.core_x mc_core_y = shower.core_y reco_event["true_energy"] = shower.energy.to("TeV").value reco_event["true_az"] = true_az.to("deg").value reco_event["true_alt"] = true_alt.to("deg").value reco_event["true_core_x"] = mc_core_x.to("m").value reco_event["true_core_y"] = mc_core_y.to("m").value # Fill table reco_table.flush() reco_event.append() if signal_handler.stop: break if signal_handler.stop: break # make sure everything gets written out nicely reco_table.flush() if args.save_images is True: for table in images_table.values(): table.flush() try: print() evt_cutflow() print() img_cutflow() except ZeroDivisionError: pass print("Job done!")
def main(): # Argument parser parser = make_argparser() parser.add_argument( "--estimate_energy", type=str2bool, default=False, help="Make estimation of energy", ) parser.add_argument("--regressor_dir", type=str, default="./", help="regressors directory") args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) # Read site layout site = cfg["General"]["site"] array = cfg["General"]["array"] if args.infile_list: filenamelist = [] for f in args.infile_list: filenamelist += glob("{}/{}".format(args.indir, f)) filenamelist.sort() else: raise ValueError("don't know which input to use...") if not filenamelist: print("no files found; check indir: {}".format(args.indir)) exit(-1) else: print("found {} files".format(len(filenamelist))) # keeping track of events and where they were rejected evt_cutflow = CutFlow("EventCutFlow") img_cutflow = CutFlow("ImageCutFlow") preper = EventPreparer(config=cfg, mode=args.mode, event_cutflow=evt_cutflow, image_cutflow=img_cutflow) # catch ctr-c signal to exit current loop and still display results signal_handler = SignalHandler() signal.signal(signal.SIGINT, signal_handler) # Regressor method regressor_method = cfg["EnergyRegressor"]["method_name"] # wrapper for the scikit-learn regressor if args.estimate_energy is True: # regressor_files = args.regressor_dir + "/regressor_{mode}_{cam_id}_{regressor}.pkl.gz" regressor_files = (args.regressor_dir + "/regressor_{mode}_{cam_id}_{regressor}.pkl.gz") reg_file = regressor_files.format( **{ "mode": args.mode, "wave_args": "mixed", # ToDo, control "regressor": regressor_method, "cam_id": "{cam_id}", }) # from IPython import embed # embed() regressor = EnergyRegressor.load(reg_file, cam_id_list=args.cam_ids) class EventFeatures(tb.IsDescription): impact_dist = tb.Float32Col(dflt=1, pos=0) sum_signal_evt = tb.Float32Col(dflt=1, pos=1) max_signal_cam = tb.Float32Col(dflt=1, pos=2) sum_signal_cam = tb.Float32Col(dflt=1, pos=3) N_LST = tb.Int16Col(dflt=1, pos=4) N_MST = tb.Int16Col(dflt=1, pos=5) N_SST = tb.Int16Col(dflt=1, pos=6) width = tb.Float32Col(dflt=1, pos=7) length = tb.Float32Col(dflt=1, pos=8) skewness = tb.Float32Col(dflt=1, pos=9) kurtosis = tb.Float32Col(dflt=1, pos=10) h_max = tb.Float32Col(dflt=1, pos=11) err_est_pos = tb.Float32Col(dflt=1, pos=12) err_est_dir = tb.Float32Col(dflt=1, pos=13) mc_energy = tb.FloatCol(dflt=1, pos=14) local_distance = tb.Float32Col(dflt=1, pos=15) n_pixel = tb.Int16Col(dflt=1, pos=16) n_cluster = tb.Int16Col(dflt=-1, pos=17) obs_id = tb.Int16Col(dflt=1, pos=18) event_id = tb.Int32Col(dflt=1, pos=19) tel_id = tb.Int16Col(dflt=1, pos=20) xi = tb.Float32Col(dflt=np.nan, pos=21) reco_energy = tb.FloatCol(dflt=np.nan, pos=22) ellipticity = tb.FloatCol(dflt=1, pos=23) n_tel_reco = tb.FloatCol(dflt=1, pos=24) n_tel_discri = tb.FloatCol(dflt=1, pos=25) mc_core_x = tb.FloatCol(dflt=1, pos=26) mc_core_y = tb.FloatCol(dflt=1, pos=27) reco_core_x = tb.FloatCol(dflt=1, pos=28) reco_core_y = tb.FloatCol(dflt=1, pos=29) mc_h_first_int = tb.FloatCol(dflt=1, pos=30) offset = tb.Float32Col(dflt=np.nan, pos=31) mc_x_max = tb.Float32Col(dflt=np.nan, pos=31) alt = tb.Float32Col(dflt=np.nan, pos=33) az = tb.Float32Col(dflt=np.nan, pos=34) reco_energy_tel = tb.Float32Col(dflt=np.nan, pos=35) # from hillas_reco ellipticity_reco = tb.FloatCol(dflt=1, pos=36) local_distance_reco = tb.Float32Col(dflt=1, pos=37) skewness_reco = tb.Float32Col(dflt=1, pos=38) kurtosis_reco = tb.Float32Col(dflt=1, pos=39) width_reco = tb.Float32Col(dflt=1, pos=40) length_reco = tb.Float32Col(dflt=1, pos=41) psi = tb.Float32Col(dflt=1, pos=42) psi_reco = tb.Float32Col(dflt=1, pos=43) sum_signal_cam_reco = tb.Float32Col(dflt=1, pos=44) feature_outfile = tb.open_file(args.outfile, mode="w") feature_table = {} feature_events = {} # Telescopes in analysis allowed_tels = set(prod3b_tel_ids(array, site=site)) for i, filename in enumerate(filenamelist): print("file: {} filename = {}".format(i, filename)) source = event_source(input_url=filename, allowed_tels=allowed_tels, max_events=args.max_events) # loop that cleans and parametrises the images and performs the reconstruction # for each event for ( event, n_pixel_dict, hillas_dict, hillas_dict_reco, n_tels, tot_signal, max_signals, n_cluster_dict, reco_result, impact_dict, ) in preper.prepare_event(source): # Angular quantities run_array_direction = event.mcheader.run_array_direction xi = angular_separation(event.mc.az, event.mc.alt, reco_result.az, reco_result.alt) offset = angular_separation( run_array_direction[0], # az run_array_direction[1], # alt reco_result.az, reco_result.alt, ) # Impact parameter reco_core_x = reco_result.core_x reco_core_y = reco_result.core_y # Height of shower maximum h_max = reco_result.h_max # Todo add conversion in number of radiation length, need an atmosphere profile reco_energy = np.nan reco_energy_tel = dict() # Not optimal at all, two loop on tel!!! # For energy estimation if args.estimate_energy is True: weight_tel = np.zeros(len(hillas_dict.keys())) energy_tel = np.zeros(len(hillas_dict.keys())) for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = event.inst.subarray.tel[tel_id].camera.cam_id moments = hillas_dict[tel_id] model = regressor.model_dict[cam_id] features_img = np.array([ np.log10(moments.intensity), np.log10(impact_dict[tel_id].value), moments.width.value, moments.length.value, h_max.value, ]) energy_tel[idx] = model.predict([features_img]) weight_tel[idx] = moments.intensity reco_energy_tel[tel_id] = energy_tel[idx] reco_energy = np.sum(weight_tel * energy_tel) / sum(weight_tel) else: for idx, tel_id in enumerate(hillas_dict.keys()): reco_energy_tel[tel_id] = np.nan for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = event.inst.subarray.tel[tel_id].camera.cam_id if cam_id not in feature_events: feature_table[cam_id] = feature_outfile.create_table( "/", "_".join(["feature_events", cam_id]), EventFeatures) feature_events[cam_id] = feature_table[cam_id].row moments = hillas_dict[tel_id] ellipticity = moments.width / moments.length # Write to file also the Hillas parameters that have been used # to calculate reco_results moments_reco = hillas_dict_reco[tel_id] ellipticity_reco = moments_reco.width / moments_reco.length feature_events[cam_id]["impact_dist"] = ( impact_dict[tel_id].to("m").value) feature_events[cam_id]["sum_signal_evt"] = tot_signal feature_events[cam_id]["max_signal_cam"] = max_signals[tel_id] feature_events[cam_id]["sum_signal_cam"] = moments.intensity feature_events[cam_id]["N_LST"] = n_tels["LST"] feature_events[cam_id]["N_MST"] = n_tels["MST"] feature_events[cam_id]["N_SST"] = n_tels["SST"] feature_events[cam_id]["width"] = moments.width.to("m").value feature_events[cam_id]["length"] = moments.length.to("m").value feature_events[cam_id]["psi"] = moments.psi.to("deg").value feature_events[cam_id]["skewness"] = moments.skewness feature_events[cam_id]["kurtosis"] = moments.kurtosis feature_events[cam_id]["h_max"] = h_max.to("m").value feature_events[cam_id]["err_est_pos"] = np.nan feature_events[cam_id]["err_est_dir"] = np.nan feature_events[cam_id]["mc_energy"] = event.mc.energy.to( "TeV").value feature_events[cam_id]["local_distance"] = moments.r.to( "m").value feature_events[cam_id]["n_pixel"] = n_pixel_dict[tel_id] feature_events[cam_id]["obs_id"] = event.r0.obs_id feature_events[cam_id]["event_id"] = event.r0.event_id feature_events[cam_id]["tel_id"] = tel_id feature_events[cam_id]["xi"] = xi.to("deg").value feature_events[cam_id]["reco_energy"] = reco_energy feature_events[cam_id]["ellipticity"] = ellipticity.value feature_events[cam_id]["n_cluster"] = n_cluster_dict[tel_id] feature_events[cam_id]["n_tel_reco"] = n_tels["reco"] feature_events[cam_id]["n_tel_discri"] = n_tels["discri"] feature_events[cam_id]["mc_core_x"] = event.mc.core_x.to( "m").value feature_events[cam_id]["mc_core_y"] = event.mc.core_y.to( "m").value feature_events[cam_id]["reco_core_x"] = reco_core_x.to( "m").value feature_events[cam_id]["reco_core_y"] = reco_core_y.to( "m").value feature_events[cam_id][ "mc_h_first_int"] = event.mc.h_first_int.to("m").value feature_events[cam_id]["offset"] = offset.to("deg").value feature_events[cam_id][ "mc_x_max"] = event.mc.x_max.value # g / cm2 feature_events[cam_id]["alt"] = reco_result.alt.to("deg").value feature_events[cam_id]["az"] = reco_result.az.to("deg").value feature_events[cam_id]["reco_energy_tel"] = reco_energy_tel[ tel_id] # Variables from hillas_dist_reco feature_events[cam_id][ "ellipticity_reco"] = ellipticity_reco.value feature_events[cam_id][ "local_distance_reco"] = moments_reco.r.to("m").value feature_events[cam_id]["skewness_reco"] = moments_reco.skewness feature_events[cam_id]["kurtosis_reco"] = moments_reco.kurtosis feature_events[cam_id]["width_reco"] = moments_reco.width.to( "m").value feature_events[cam_id]["length_reco"] = moments_reco.length.to( "m").value feature_events[cam_id]["psi_reco"] = moments_reco.psi.to( "deg").value feature_events[cam_id][ "sum_signal_cam_reco"] = moments_reco.intensity feature_events[cam_id].append() if signal_handler.stop: break if signal_handler.stop: break # make sure that all the events are properly stored for table in feature_table.values(): table.flush() img_cutflow() evt_cutflow()
def main(): # Read arguments parser = argparse.ArgumentParser( description="Build model for regression/classification") parser.add_argument("--config_file", type=str, required=True) parser.add_argument( "--max_events", type=int, default=-1, help="maximum number of events for training", ) mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument( "--wave", dest="mode", action="store_const", const="wave", default="tail", help="if set, use wavelet cleaning", ) mode_group.add_argument( "--tail", dest="mode", action="store_const", const="tail", help="if set, use tail cleaning, otherwise wavelets", ) args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) # Type of model (regression or classification) model_type = cfg["General"]["model_type"] # Import parameters data_dir = cfg["General"]["data_dir"] outdir = cfg["General"]["outdir"] if not os.path.exists(outdir): os.makedirs(outdir) cam_ids = cfg["General"]["cam_id_list"] table_name_template = cfg["General"]["table_name_template"] table_name = [table_name_template + cam_id for cam_id in cam_ids] # List of features feature_list = cfg["FeatureList"] # Optimisation parameters method_name = cfg["Method"]["name"] tuned_parameters = [cfg["Method"]["tuned_parameters"]] scoring = "explained_variance" cv = cfg["Method"]["cv"] # Split fraction train_fraction = cfg["Split"]["train_fraction"] if model_type in "regressor": data_file = cfg["General"]["data_file"].format(args.mode) filename = path.join(data_dir, data_file) # List of cuts cuts = make_cut_list(cfg["SigFiducialCuts"]) init_model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=None)) # Name of target target_name = cfg["Method"]["target_name"] elif model_type in "classifier": data_sig_file = cfg["General"]["data_sig_file"].format(args.mode) data_bkg_file = cfg["General"]["data_bkg_file"].format(args.mode) filename_sig = path.join(data_dir, data_sig_file) filename_bkg = path.join(data_dir, data_bkg_file) # List of cuts sig_cuts = make_cut_list(cfg["SigFiducialCuts"]) bkg_cuts = make_cut_list(cfg["BkgFiducialCuts"]) # Model if method_name in "AdaBoostClassifier": init_model = AdaBoostClassifier( DecisionTreeClassifier(max_depth=4)) elif method_name in "RandomForestClassifier": init_model = RandomForestClassifier( n_estimators=500, max_depth=None, min_samples_split=0.05, max_features="sqrt", bootstrap=True, random_state=None, criterion="gini", class_weight= "balanced_subsample", # Reweight events for each tree ) use_same_number_of_sig_and_bkg_for_training = cfg["Split"][ "use_same_number_of_sig_and_bkg_for_training"] print("### Using {} for model construction".format(method_name)) models = dict() for idx, cam_id in enumerate(cam_ids): print("### Building model for {}".format(cam_id)) if model_type in "regressor": # Load data data = pd.read_hdf(filename, table_name[idx], mode="r") data = prepare_data(ds=data, cuts=cuts)[0:args.max_events] # Init model factory factory = TrainModel(case=model_type, target_name=target_name, feature_name_list=feature_list) # Split data factory.split_data(data_sig=data, train_fraction=train_fraction) print("Training sample: sig {}".format(len(factory.data_train))) print("Test sample: sig {}".format(len(factory.data_test))) elif model_type in "classifier": # Load data data_sig = pd.read_hdf(filename_sig, table_name[idx], mode="r") data_bkg = pd.read_hdf(filename_bkg, table_name[idx], mode="r") # Add label data_sig = prepare_data(ds=data_sig, label=1, cuts=sig_cuts) data_bkg = prepare_data(ds=data_bkg, label=0, cuts=bkg_cuts) data_sig = data_sig[0:args.max_events] data_bkg = data_bkg[0:args.max_events] # Init model factory factory = TrainModel(case=model_type, target_name="label", feature_name_list=feature_list) # Split data factory.split_data( data_sig=data_sig, data_bkg=data_bkg, train_fraction=train_fraction, force_same_nsig_nbkg= use_same_number_of_sig_and_bkg_for_training, ) print("Training sample: sig {} and bkg {}".format( len(factory.data_train.query("label==1")), len(factory.data_train.query("label==0")), )) print("Test sample: sig {} and bkg {}".format( len(factory.data_test.query("label==1")), len(factory.data_test.query("label==0")), )) # Build model best_model = factory.get_optimal_model(init_model, tuned_parameters, scoring=scoring, cv=cv) if model_type in "classifier": # print report if model_type in "classifier": print( classification_report( factory.data_scikit["y_test"], best_model.predict(factory.data_scikit["X_test"]), )) # Calibrate model if necessary on test data if cfg["Method"]["calibrate_output"] is True: print("==> Calibrate classifier...") best_model = CalibratedClassifierCV(best_model, method="sigmoid", cv="prefit") best_model.fit(factory.data_scikit["X_test"], factory.data_scikit["y_test"]) # save model models[cam_id] = best_model outname = "{}_{}_{}_{}.pkl.gz".format(model_type, args.mode, cam_id, method_name) joblib.dump(best_model, path.join(outdir, outname)) # save data save_obj( factory.data_scikit, path.join( outdir, "data_scikit_{}_{}_{}_{}.pkl.gz".format( model_type, method_name, args.mode, cam_id), ), ) factory.data_train.to_pickle( path.join( outdir, "data_train_{}_{}_{}_{}.pkl.gz".format(model_type, method_name, args.mode, cam_id), )) factory.data_test.to_pickle( path.join( outdir, "data_test_{}_{}_{}_{}.pkl.gz".format(model_type, method_name, args.mode, cam_id), ))
def main(): # Read arguments parser = argparse.ArgumentParser( description='Build model for regression/classification') parser.add_argument('--config_file', type=str, required=True) parser.add_argument('--max_events', type=int, default=-1, help="maximum number of events for training") mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument('--wave', dest="mode", action='store_const', const="wave", default="tail", help="if set, use wavelet cleaning") mode_group.add_argument( '--tail', dest="mode", action='store_const', const="tail", help="if set, use tail cleaning, otherwise wavelets") args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) # Type of model (regression or classification) model_type = cfg['General']['model_type'] # Import parameters data_dir = cfg['General']['data_dir'] outdir = cfg['General']['outdir'] if not os.path.exists(outdir): os.makedirs(outdir) cam_ids = cfg['General']['cam_id_list'] table_name_template = cfg['General']['table_name_template'] table_name = [table_name_template + cam_id for cam_id in cam_ids] # List of features feature_list = cfg['FeatureList'] # Optimisation parameters method_name = cfg['Method']['name'] tuned_parameters = [cfg['Method']['tuned_parameters']] scoring = 'explained_variance' cv = cfg['Method']['cv'] # Split fraction train_fraction = cfg['Split']['train_fraction'] if model_type in 'regressor': data_file = cfg['General']['data_file'].format(args.mode) filename = path.join(data_dir, data_file) # List of cuts cuts = make_cut_list(cfg['SigFiducialCuts']) init_model = AdaBoostRegressor(DecisionTreeRegressor(max_depth=None)) # Name of target target_name = cfg['Method']['target_name'] elif model_type in 'classifier': data_sig_file = cfg['General']['data_sig_file'].format(args.mode) data_bkg_file = cfg['General']['data_bkg_file'].format(args.mode) filename_sig = path.join(data_dir, data_sig_file) filename_bkg = path.join(data_dir, data_bkg_file) # List of cuts sig_cuts = make_cut_list(cfg['SigFiducialCuts']) bkg_cuts = make_cut_list(cfg['BkgFiducialCuts']) # Model if method_name in 'AdaBoostClassifier': init_model = AdaBoostClassifier( DecisionTreeClassifier(max_depth=4)) elif method_name in 'RandomForestClassifier': init_model = RandomForestClassifier( n_estimators=500, max_depth=None, min_samples_split=0.05, max_features='sqrt', bootstrap=True, random_state=None, criterion='gini', class_weight= 'balanced_subsample' # Reweight events for each tree ) use_same_number_of_sig_and_bkg_for_training = cfg['Split'][ 'use_same_number_of_sig_and_bkg_for_training'] print('### Using {} for model construction'.format(method_name)) models = dict() for idx, cam_id in enumerate(cam_ids): print('### Building model for {}'.format(cam_id)) if model_type in 'regressor': # Load data data = pd.read_hdf(filename, table_name[idx], mode='r') data = prepare_data(ds=data, cuts=cuts)[0:args.max_events] # Init model factory factory = TrainModel(case=model_type, target_name=target_name, feature_name_list=feature_list) # Split data factory.split_data(data_sig=data, train_fraction=train_fraction) print('Training sample: sig {}'.format(len(factory.data_train), )) print('Test sample: sig {}'.format(len(factory.data_test))) elif model_type in 'classifier': # Load data data_sig = pd.read_hdf(filename_sig, table_name[idx], mode='r') data_bkg = pd.read_hdf(filename_bkg, table_name[idx], mode='r') # Add label data_sig = prepare_data(ds=data_sig, label=1, cuts=sig_cuts) data_bkg = prepare_data(ds=data_bkg, label=0, cuts=bkg_cuts) data_sig = data_sig[0:args.max_events] data_bkg = data_bkg[0:args.max_events] # Init model factory factory = TrainModel(case=model_type, target_name='label', feature_name_list=feature_list) # Split data factory.split_data( data_sig=data_sig, data_bkg=data_bkg, train_fraction=train_fraction, force_same_nsig_nbkg=use_same_number_of_sig_and_bkg_for_training ) print('Training sample: sig {} and bkg {}'.format( len(factory.data_train.query('label==1')), len(factory.data_train.query('label==0')))) print('Test sample: sig {} and bkg {}'.format( len(factory.data_test.query('label==1')), len(factory.data_test.query('label==0')))) # Build model best_model = factory.get_optimal_model(init_model, tuned_parameters, scoring=scoring, cv=cv) if model_type in 'classifier': # print report if model_type in 'classifier': print( classification_report( factory.data_scikit['y_test'], best_model.predict(factory.data_scikit['X_test']))) # Calibrate model if necessary on test data if cfg['Method']['calibrate_output'] is True: print('==> Calibrate classifier...') best_model = CalibratedClassifierCV(best_model, method='sigmoid', cv='prefit') best_model.fit(factory.data_scikit['X_test'], factory.data_scikit['y_test']) # save model models[cam_id] = best_model outname = '{}_{}_{}_{}.pkl.gz'.format(model_type, args.mode, cam_id, method_name) joblib.dump(best_model, path.join(outdir, outname)) # save data save_obj( factory.data_scikit, path.join( outdir, 'data_scikit_{}_{}_{}_{}.pkl.gz'.format( model_type, method_name, args.mode, cam_id))) factory.data_train.to_pickle( path.join( outdir, 'data_train_{}_{}_{}_{}.pkl.gz'.format(model_type, method_name, args.mode, cam_id))) factory.data_test.to_pickle( path.join( outdir, 'data_test_{}_{}_{}_{}.pkl.gz'.format(model_type, method_name, args.mode, cam_id)))
def main(): # Read arguments parser = argparse.ArgumentParser(description="Make performance files") parser.add_argument("--config_file", type=str, required=True, help="") parser.add_argument( "--obs_time", type=str, required=True, help="Observation time, should be given as a string, value and astropy unit separated by an empty space", ) mode_group = parser.add_mutually_exclusive_group() mode_group.add_argument( "--wave", dest="mode", action="store_const", const="wave", default="tail", help="if set, use wavelet cleaning", ) mode_group.add_argument( "--tail", dest="mode", action="store_const", const="tail", help="if set, use tail cleaning, otherwise wavelets", ) args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) # Add obs. time in configuration file str_obs_time = args.obs_time.split() cfg["analysis"]["obs_time"] = { "value": float(str_obs_time[0]), "unit": str(str_obs_time[-1]), } # Create output directory if necessary outdir = os.path.join( cfg["general"]["outdir"], "irf_{}_ThSq_{}_Time{:.2f}{}".format( args.mode, cfg["analysis"]["thsq_opt"]["type"], cfg["analysis"]["obs_time"]["value"], cfg["analysis"]["obs_time"]["unit"], ), ) if not os.path.exists(outdir): os.makedirs(outdir) indir = cfg["general"]["indir"] template_input_file = cfg["general"]["template_input_file"] # Load data particles = ["gamma", "electron", "proton"] evt_dict = dict() # Contain DL2 file for each type of particle for particle in particles: # template looks like dl2_{}_{}_merged.h5 infile = os.path.join(indir, template_input_file.format(args.mode, particle)) evt_dict[particle] = pd.read_hdf(infile, key="reco_events") # Apply offset cut to proton and electron for particle in ["electron", "proton"]: # print('Initial stat: {} {}'.format(len(evt_dict[particle]), particle)) evt_dict[particle] = evt_dict[particle].query('offset <= {}'.format( cfg['analysis']['max_bg_radius']) ) # Add required data in configuration file for future computation for particle in particles: cfg['particle_information'][particle]['n_files'] = \ len(np.unique(evt_dict[particle]['obs_id'])) cfg['particle_information'][particle]['n_simulated'] = \ cfg['particle_information'][particle]['n_files'] * cfg['particle_information'][particle]['num_showers'] * cfg['particle_information'][particle]['num_use'] # Define model for the particles model_dict = { "gamma": CrabSpectrum("hegra").model, "proton": cosmic_ray_flux, "electron": cosmic_ray_flux, } # Reco energy binning cfg_binning = cfg["analysis"]["ereco_binning"] ereco = ( np.logspace( np.log10(cfg_binning["emin"]), np.log10(cfg_binning["emax"]), cfg_binning["nbin"] + 1, ) * u.TeV ) # Handle theta square cut optimisation # (compute 68 % containment radius PSF if necessary) thsq_opt_type = cfg["analysis"]["thsq_opt"]["type"] if thsq_opt_type in "fixed": thsq_values = np.array([cfg["analysis"]["thsq_opt"]["value"]]) * u.deg print("Using fixed theta cut: {}".format(thsq_values)) elif thsq_opt_type in "opti": thsq_values = np.arange(0.05, 0.40, 0.01) * u.deg print("Optimising theta cut for: {}".format(thsq_values)) elif thsq_opt_type in "r68": print("Using R68% theta cut") print("Computing...") cfg_binning = cfg["analysis"]["ereco_binning"] ereco = ( np.logspace( np.log10(cfg_binning["emin"]), np.log10(cfg_binning["emax"]), cfg_binning["nbin"] + 1, ) * u.TeV ) radius = 68 thsq_values = list() for ibin in range(len(ereco) - 1): emin = ereco[ibin] emax = ereco[ibin + 1] energy_query = "reco_energy > {} and reco_energy <= {}".format( emin.value, emax.value ) data = evt_dict["gamma"].query(energy_query).copy() min_stat = 0 if len(data) <= min_stat: print(" ==> Not enough statistics:") print("To be handled...") thsq_values.append(0.3) continue # import sys # sys.exit() psf = np.percentile(data["offset"], radius) psf_err = psf / np.sqrt(len(data)) thsq_values.append(psf) thsq_values = np.array(thsq_values) * u.deg # Set 0.05 as a lower value idx = np.where(thsq_values.value < 0.05) thsq_values[idx] = 0.05 * u.deg print("Using theta cut: {}".format(thsq_values)) # Cuts optimisation print("### Finding best cuts...") cut_optimiser = CutsOptimisation(config=cfg, evt_dict=evt_dict, verbose_level=0) # Weight events print("- Weighting events...") cut_optimiser.weight_events( model_dict=model_dict, colname_mc_energy=cfg["column_definition"]["mc_energy"] ) # Find best cutoff to reach best sensitivity print("- Estimating cutoffs...") cut_optimiser.find_best_cutoff(energy_values=ereco, angular_values=thsq_values) # Save results and auxiliary data for diagnostic print("- Saving results to disk...") cut_optimiser.write_results( outdir, "{}.fits".format(cfg["general"]["output_table_name"]), format="fits" ) # Cuts diagnostic print("### Building cut diagnostics...") cut_diagnostic = CutsDiagnostic(config=cfg, indir=outdir) cut_diagnostic.plot_optimisation_summary() cut_diagnostic.plot_diagnostics() # Apply cuts and save data print("### Applying cuts to data...") cut_applicator = CutsApplicator(config=cfg, evt_dict=evt_dict, outdir=outdir) cut_applicator.apply_cuts() # Irf Maker print("### Building IRF...") irf_maker = IrfMaker(config=cfg, evt_dict=evt_dict, outdir=outdir) irf_maker.build_irf() # Sensitivity maker print("### Estimating sensitivity...") sensitivity_maker = SensitivityMaker(config=cfg, outdir=outdir) sensitivity_maker.load_irf() sensitivity_maker.estimate_sensitivity()