def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file(os.path.abspath(args.config_file)) except("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.datafile, key=dl1_params_lstcam_key) data = filter_events(data, filters=config["events_filters"]) #Load the trained RF for reconstruction: fileE = args.path_models + "/reg_energy.sav" fileD = args.path_models + "/reg_disp_vector.sav" fileH = args.path_models + "/cls_gh.sav" reg_energy = joblib.load(fileE) reg_disp_vector = joblib.load(fileD) cls_gh = joblib.load(fileH) #Apply the models to the data dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) os.makedirs(args.outdir, exist_ok=True) outfile = args.outdir + '/dl2_' + os.path.basename(args.datafile) shutil.copyfile(args.datafile, outfile) write_dl2_dataframe(dl2.astype(float), outfile)
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file( os.path.abspath(args.config_file)) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.datafile, key=dl1_params_lstcam_key) if config['source_dependent']: data = pd.concat( [data, pd.read_hdf(data, key=dl1_params_src_dep_lstcam_key)], axis=1) # Dealing with pointing missing values. This happened when `ucts_time` was invalid. if 'alt_tel' in data.columns and 'az_tel' in data.columns \ and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()): # make sure there is a least one good pointing value to interp from. if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any(): data = impute_pointing(data) else: data.alt_tel = -np.pi / 2. data.az_tel = -np.pi / 2. data = filter_events(data, filters=config["events_filters"]) #Load the trained RF for reconstruction: fileE = args.path_models + "/reg_energy.sav" fileD = args.path_models + "/reg_disp_vector.sav" fileH = args.path_models + "/cls_gh.sav" reg_energy = joblib.load(fileE) reg_disp_vector = joblib.load(fileD) cls_gh = joblib.load(fileH) #Apply the models to the data dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) os.makedirs(args.outdir, exist_ok=True) outfile = os.path.join( args.outdir, os.path.basename(args.datafile).replace('dl1', 'dl2')) shutil.copyfile(args.datafile, outfile) write_dl2_dataframe(dl2.astype(float), outfile)
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file(args.config_file) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) reg_energy, reg_disp_vector, cls_gh = dl1_to_dl2.build_models( args.gammafile, args.protonfile, save_models=args.storerf, path_models=args.path_models, custom_config=config, ) gammas = filter_events( pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key), config["events_filters"], ) proton = filter_events( pd.read_hdf(args.protontest, key=dl1_params_lstcam_key), config["events_filters"], ) data = pd.concat([gammas, proton], ignore_index=True) dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) ####PLOT SOME RESULTS##### selected_gammas = dl2.query('reco_type==0 & mc_type==0') if (len(selected_gammas) == 0): log.warning('No gammas selected, I will not plot any output') sys.exit() plot_dl2.plot_features(dl2) if not args.batch: plt.show() plot_dl2.energy_results(selected_gammas) if not args.batch: plt.show() plot_dl2.direction_results(selected_gammas) if not args.batch: plt.show() plot_dl2.plot_disp_vector(selected_gammas) if not args.batch: plt.show() plot_dl2.plot_pos(dl2) if not args.batch: plt.show() plot_dl2.plot_roc_gamma(dl2) if not args.batch: plt.show() plot_dl2.plot_models_features_importances(args.path_models, args.config_file) if not args.batch: plt.show() plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100) plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100) if not args.batch: plt.show()
def main(): args = parser.parse_args() custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file( os.path.abspath(args.config_file)) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key) if 'lh_fit_config' in config.keys(): lhfit_data = pd.read_hdf(args.input_file, key=dl1_likelihood_params_lstcam_key) if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all( lhfit_data['event_id'] == data['event_id']): lhfit_data.drop({'obs_id', 'event_id'}, axis=1, inplace=True) lhfit_keys = lhfit_data.keys() data = pd.concat([data, lhfit_data], axis=1) # if real data, add deltat t to dataframe keys data = add_delta_t_key(data) # Dealing with pointing missing values. This happened when `ucts_time` was invalid. if 'alt_tel' in data.columns and 'az_tel' in data.columns \ and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()): # make sure there is a least one good pointing value to interp from. if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any(): data = impute_pointing(data) else: data.alt_tel = -np.pi / 2. data.az_tel = -np.pi / 2. # Get trained RF path for reconstruction: file_reg_energy = os.path.join(args.path_models, 'reg_energy.sav') file_cls_gh = os.path.join(args.path_models, 'cls_gh.sav') if config['disp_method'] == 'disp_vector': file_disp_vector = os.path.join(args.path_models, 'reg_disp_vector.sav') elif config['disp_method'] == 'disp_norm_sign': file_disp_norm = os.path.join(args.path_models, 'reg_disp_norm.sav') file_disp_sign = os.path.join(args.path_models, 'cls_disp_sign.sav') subarray_info = SubarrayDescription.from_hdf(args.input_file) tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length # Apply the models to the data # Source-independent analysis if not config['source_dependent']: data = filter_events( data, filters=config["events_filters"], finite_params=config['energy_regression_features'] + config['disp_regression_features'] + config['particle_classification_features'] + config['disp_classification_features'], ) if config['disp_method'] == 'disp_vector': dl2 = dl1_to_dl2.apply_models(data, file_cls_gh, file_reg_energy, reg_disp_vector=file_disp_vector, focal_length=focal_length, custom_config=config) elif config['disp_method'] == 'disp_norm_sign': dl2 = dl1_to_dl2.apply_models(data, file_cls_gh, file_reg_energy, reg_disp_norm=file_disp_norm, cls_disp_sign=file_disp_sign, focal_length=focal_length, custom_config=config) # Source-dependent analysis if config['source_dependent']: # if source-dependent parameters are already in dl1 data, just read those data. if dl1_params_src_dep_lstcam_key in get_dataset_keys(args.input_file): data_srcdep = get_srcdep_params(args.input_file) # if not, source-dependent parameters are added now else: data_srcdep = pd.concat(dl1_to_dl2.get_source_dependent_parameters( data, config, focal_length=focal_length), axis=1) dl2_srcdep_dict = {} srcindep_keys = data.keys() srcdep_assumed_positions = data_srcdep.columns.levels[0] for i, k in enumerate(srcdep_assumed_positions): data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1) data_with_srcdep_param = filter_events( data_with_srcdep_param, filters=config["events_filters"], finite_params=config['energy_regression_features'] + config['disp_regression_features'] + config['particle_classification_features'] + config['disp_classification_features'], ) if config['disp_method'] == 'disp_vector': dl2_df = dl1_to_dl2.apply_models( data_with_srcdep_param, file_cls_gh, file_reg_energy, reg_disp_vector=file_disp_vector, focal_length=focal_length, custom_config=config) elif config['disp_method'] == 'disp_norm_sign': dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param, file_cls_gh, file_reg_energy, reg_disp_norm=file_disp_norm, cls_disp_sign=file_disp_sign, focal_length=focal_length, custom_config=config) dl2_srcdep = dl2_df.drop(srcindep_keys, axis=1) dl2_srcdep_dict[k] = dl2_srcdep if i == 0: dl2_srcindep = dl2_df[srcindep_keys] os.makedirs(args.output_dir, exist_ok=True) output_file = os.path.join( args.output_dir, os.path.basename(args.input_file).replace('dl1', 'dl2', 1)) if os.path.exists(output_file): raise IOError(output_file + ' exists, exiting.') dl1_keys = get_dataset_keys(args.input_file) if dl1_images_lstcam_key in dl1_keys: dl1_keys.remove(dl1_images_lstcam_key) if dl1_params_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_lstcam_key) if dl1_params_src_dep_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_src_dep_lstcam_key) if dl1_likelihood_params_lstcam_key in dl1_keys: dl1_keys.remove(dl1_likelihood_params_lstcam_key) metadata = global_metadata() write_metadata(metadata, output_file) with open_file(args.input_file, 'r') as h5in: with open_file(output_file, 'a') as h5out: # Write the selected DL1 info for k in dl1_keys: if not k.startswith('/'): k = '/' + k path, name = k.rsplit('/', 1) if path not in h5out: grouppath, groupname = path.rsplit('/', 1) g = h5out.create_group(grouppath, groupname, createparents=True) else: g = h5out.get_node(path) h5in.copy_node(k, g, overwrite=True) # need container to use lstchain.io.add_global_metadata and lstchain.io.add_config_metadata if not config['source_dependent']: if 'lh_fit_config' not in config.keys(): write_dl2_dataframe(dl2, output_file, config=config, meta=metadata) else: dl2_onlylhfit = dl2[lhfit_keys] dl2.drop(lhfit_keys, axis=1, inplace=True) write_dl2_dataframe(dl2, output_file, config=config, meta=metadata) write_dataframe(dl2_onlylhfit, output_file, dl2_likelihood_params_lstcam_key, config=config, meta=metadata) else: write_dl2_dataframe(dl2_srcindep, output_file, config=config, meta=metadata) write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file, dl2_params_src_dep_lstcam_key, config=config, meta=metadata)
if __name__ == '__main__': set_figures() train_filename = args.train_file test_filename = args.test_file custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file(args.config_file) except("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) events_filters = config["events_filters"] param_train = data_prepare(train_filename, key=args.dl1_params_camera_key, filters=events_filters, telescope=args.telescope, quality=True, intensity_cut=args.intensity_cut) param_test = data_prepare(test_filename, key=args.dl1_params_camera_key, filters=events_filters, telescope=args.telescope, quality=True, intensity_cut=args.intensity_cut) # Aplikace dodatecneho cutu na vzdalenost zdroje pro training dataset src_center_distance = np.sqrt(param_train['src_x']**2 + param_train['src_y']**2) cut_deg = 1 # deg mask = src_center_distance < cut_deg/2.0 param_train = param_train[mask] print('Size of training dataset after source position cut:', param_train.shape[0]) # source position in difuse training data x_src = param_train['src_x'] y_src = param_train['src_y']
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file(args.config_file) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) reg_energy, reg_disp_vector, cls_gh = dl1_to_dl2.build_models( args.gammafile, args.protonfile, save_models=args.storerf, path_models=args.path_models, custom_config=config, ) gammas = filter_events( pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key), config["events_filters"], ) proton = filter_events( pd.read_hdf(args.protontest, key=dl1_params_lstcam_key), config["events_filters"], ) data = pd.concat([gammas, proton], ignore_index=True) dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) ####PLOT SOME RESULTS##### gammas = dl2[dl2.gammaness >= 0.5] protons = dl2[dl2.gammaness < 0.5] gammas.reco_type = 0 protons.reco_type = 1 focal_length = 28 * u.m src_pos_reco = utils.reco_source_position_sky( gammas.x.values * u.m, gammas.y.values * u.m, gammas.reco_disp_dx.values * u.m, gammas.reco_disp_dy.values * u.m, focal_length, gammas.mc_alt_tel.values * u.rad, gammas.mc_az_tel.values * u.rad) plot_dl2.plot_features(dl2) plt.show() plot_dl2.plot_e(gammas, 10, 1.5, 3.5) plt.show() plot_dl2.calc_resolution(gammas) plt.show() plot_dl2.plot_e_resolution(gammas, 10, 1.5, 3.5) plt.show() plot_dl2.plot_disp_vector(gammas) plt.show() try: ctaplot.plot_theta2( gammas.mc_alt, np.arctan(np.tan(gammas.mc_az)), src_pos_reco.alt.rad, np.arctan(np.tan(src_pos_reco.az.rad)), bins=50, range=(0, 1), ) plt.show() ctaplot.plot_angular_res_per_energy( src_pos_reco.alt.rad, np.arctan(np.tan(src_pos_reco.az.rad)), gammas.mc_alt, np.arctan(np.tan(gammas.mc_az)), gammas.mc_energy) plt.show() except: pass regression_features = config["regression_features"] classification_features = config["classification_features"] plt.show() plot_dl2.plot_pos(dl2) plt.show() plot_dl2.plot_ROC(cls_gh, dl2, classification_features, -1) plt.show() plot_dl2.plot_importances(cls_gh, classification_features) plt.show() plot_dl2.plot_importances(reg_energy, regression_features) plt.show() plot_dl2.plot_importances(reg_disp_vector, regression_features) plt.show() plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100) plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100) plt.show()
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file( os.path.abspath(args.config_file)) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key) if config['source_dependent']: data_src_dep = pd.read_hdf(args.input_file, key=dl1_params_src_dep_lstcam_key) data = pd.concat([data, data_src_dep], axis=1) # Dealing with pointing missing values. This happened when `ucts_time` was invalid. if 'alt_tel' in data.columns and 'az_tel' in data.columns \ and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()): # make sure there is a least one good pointing value to interp from. if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any(): data = impute_pointing(data) else: data.alt_tel = -np.pi / 2. data.az_tel = -np.pi / 2. data = filter_events( data, filters=config["events_filters"], finite_params=config['regression_features'] + config['classification_features'], ) #Load the trained RF for reconstruction: fileE = args.path_models + "/reg_energy.sav" fileD = args.path_models + "/reg_disp_vector.sav" fileH = args.path_models + "/cls_gh.sav" reg_energy = joblib.load(fileE) reg_disp_vector = joblib.load(fileD) cls_gh = joblib.load(fileH) #Apply the models to the data dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, custom_config=config) os.makedirs(args.output_dir, exist_ok=True) output_file = os.path.join( args.output_dir, os.path.basename(args.input_file).replace('dl1', 'dl2')) if os.path.exists(output_file): raise IOError(output_file + ' exists, exiting.') dl1_keys = get_dataset_keys(args.input_file) if dl1_images_lstcam_key in dl1_keys: dl1_keys.remove(dl1_images_lstcam_key) if dl1_params_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_lstcam_key) if dl1_params_src_dep_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_src_dep_lstcam_key) with open_file(args.input_file, 'r') as h5in: with open_file(output_file, 'a') as h5out: # Write the selected DL1 info for k in dl1_keys: if not k.startswith('/'): k = '/' + k path, name = k.rsplit('/', 1) if path not in h5out: grouppath, groupname = path.rsplit('/', 1) g = h5out.create_group(grouppath, groupname, createparents=True) else: g = h5out.get_node(path) h5in.copy_node(k, g, overwrite=True) write_dl2_dataframe(dl2, output_file)
def main(): args = parser.parse_args() custom_config = {} if args.config_file is not None: custom_config = read_configuration_file(args.config_file) config = replace_config(standard_config, custom_config) subarray_info = SubarrayDescription.from_hdf(args.gammatest) tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length reg_energy, reg_disp_norm, cls_disp_sign, cls_gh = dl1_to_dl2.build_models( args.gammafile, args.protonfile, save_models=args.save_models, path_models=args.path_models, custom_config=config, ) gammas = filter_events( pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key), config["events_filters"], ) proton = filter_events( pd.read_hdf(args.protontest, key=dl1_params_lstcam_key), config["events_filters"], ) data = pd.concat([gammas, proton], ignore_index=True) dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_norm=reg_disp_norm, cls_disp_sign=cls_disp_sign, focal_length=focal_length, custom_config=config) ####PLOT SOME RESULTS##### selected_gammas = dl2.query('reco_type==0 & mc_type==0') if (len(selected_gammas) == 0): log.warning('No gammas selected, I will not plot any output') sys.exit() plot_dl2.plot_features(dl2) if not args.batch: plt.show() plot_dl2.energy_results(selected_gammas) if not args.batch: plt.show() plot_dl2.direction_results(selected_gammas) if not args.batch: plt.show() plot_dl2.plot_disp_vector(selected_gammas) if not args.batch: plt.show() plot_dl2.plot_pos(dl2) if not args.batch: plt.show() plot_dl2.plot_roc_gamma(dl2) if not args.batch: plt.show() plot_dl2.plot_models_features_importances(args.path_models, args.config_file) if not args.batch: plt.show() plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100) plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100) if not args.batch: plt.show()
def main(): custom_config = {} if args.config_file is not None: try: custom_config = read_configuration_file( os.path.abspath(args.config_file)) except ("Custom configuration could not be loaded !!!"): pass config = replace_config(standard_config, custom_config) data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key) # if real data, add deltat t to dataframe keys data = add_delta_t_key(data) # Dealing with pointing missing values. This happened when `ucts_time` was invalid. if 'alt_tel' in data.columns and 'az_tel' in data.columns \ and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()): # make sure there is a least one good pointing value to interp from. if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any(): data = impute_pointing(data) else: data.alt_tel = -np.pi / 2. data.az_tel = -np.pi / 2. # Load the trained RF for reconstruction: fileE = args.path_models + "/reg_energy.sav" fileD = args.path_models + "/reg_disp_vector.sav" fileH = args.path_models + "/cls_gh.sav" reg_energy = joblib.load(fileE) reg_disp_vector = joblib.load(fileD) cls_gh = joblib.load(fileH) subarray_info = SubarrayDescription.from_hdf(args.input_file) tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1 focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length # Apply the models to the data # Source-independent analysis if not config['source_dependent']: data = filter_events( data, filters=config["events_filters"], finite_params=config['regression_features'] + config['classification_features'], ) dl2 = dl1_to_dl2.apply_models(data, cls_gh, reg_energy, reg_disp_vector, focal_length=focal_length, custom_config=config) # Source-dependent analysis if config['source_dependent']: data_srcdep = pd.read_hdf(args.input_file, key=dl1_params_src_dep_lstcam_key) data_srcdep.columns = pd.MultiIndex.from_tuples([ tuple(col[1:-1].replace('\'', '').replace(' ', '').split(",")) for col in data_srcdep.columns ]) dl2_srcdep_dict = {} for i, k in enumerate(data_srcdep.columns.levels[0]): data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1) data_with_srcdep_param = filter_events( data_with_srcdep_param, filters=config["events_filters"], finite_params=config['regression_features'] + config['classification_features'], ) dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param, cls_gh, reg_energy, reg_disp_vector, focal_length=focal_length, custom_config=config) dl2_srcdep = dl2_df.drop(data.keys(), axis=1) dl2_srcdep_dict[k] = dl2_srcdep if i == 0: dl2_srcindep = dl2_df.drop(data_srcdep[k].keys(), axis=1) os.makedirs(args.output_dir, exist_ok=True) output_file = os.path.join( args.output_dir, os.path.basename(args.input_file).replace('dl1', 'dl2')) if os.path.exists(output_file): raise IOError(output_file + ' exists, exiting.') dl1_keys = get_dataset_keys(args.input_file) if dl1_images_lstcam_key in dl1_keys: dl1_keys.remove(dl1_images_lstcam_key) if dl1_params_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_lstcam_key) if dl1_params_src_dep_lstcam_key in dl1_keys: dl1_keys.remove(dl1_params_src_dep_lstcam_key) with open_file(args.input_file, 'r') as h5in: with open_file(output_file, 'a') as h5out: # Write the selected DL1 info for k in dl1_keys: if not k.startswith('/'): k = '/' + k path, name = k.rsplit('/', 1) if path not in h5out: grouppath, groupname = path.rsplit('/', 1) g = h5out.create_group(grouppath, groupname, createparents=True) else: g = h5out.get_node(path) h5in.copy_node(k, g, overwrite=True) if not config['source_dependent']: write_dl2_dataframe(dl2, output_file) else: write_dl2_dataframe(dl2_srcindep, output_file) write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file, dl2_params_src_dep_lstcam_key)