def test_level_populations(): clf = EventClassifier(cam_id_list=None) g = np.array([[26, 70, 53], [97, 20, 56], [35, 38, 81], [48, 60, 40], [73, 68, 63], [96, 86, 63], [73, 67, 6], [48, 66, 60], [47, 82, 87], [60, 52, 74]]) h = np.array([[18, 31, 47], [15, 81, 72], [75, 93, 45], [57, 50, 3], [12, 80, 3], [82, 49, 31], [1, 21, 0], [79, 12, 29], [19, 52, 42], [86, 49, 15]]) dum_l = [{ 'maxf': 100, 'minf': 0, 'col': 0, 'nbins': 4 }, { 'maxf': 100, 'minf': 0, 'col': 1, 'nbins': 2 }] group_g = clf._hyperBinning(g, dum_l) group_h = clf._hyperBinning(h, dum_l) cleaned_g, cleaned_h = clf.level_populations(group_g, group_h, g, h) assert cleaned_g.shape == cleaned_h.shape
def test_pipeline_classifier(): cam_id_list = ["FlashCam", "ASTRICam"] feature_list = {"FlashCam": [[1, 10], [2, 20], [3, 30], [0.9, 9], [10, 1], [20, 2], [30, 3], [9, 0.9]], "ASTRICam": [[10, 1], [20, 2], [30, 3], [9, 0.9], [1, 10], [2, 20], [3, 30], [0.9, 9]]} target_list = {"FlashCam": ["a", "a", "a", "a", "b", "b", "b", "b"], "ASTRICam": ["a", "a", "a", "a", "b", "b", "b", "b"]} estimators = [('scaler', StandardScaler()), ('clf', MLPClassifier(max_iter=400))] clf = EventClassifier(classifier=Pipeline, steps=estimators, cam_id_list=cam_id_list) clf.fit(feature_list, target_list) prediction = clf.predict_by_event([{"ASTRICam": [[10, 1]]}, {"ASTRICam": [[2, 20]]}, {"ASTRICam": [[3, 30]]}]) assert (prediction == ["a", "b", "b"]).all() prediction = clf.predict_by_event([{"FlashCam": [[10, 1]]}, {"FlashCam": [[2, 20]]}, {"FlashCam": [[3, 30]]}]) assert (prediction == ["b", "a", "a"]).all()
def test_level_populations(): clf = EventClassifier(cam_id_list=None) g = np.array([[26, 70, 53], [97, 20, 56], [35, 38, 81], [48, 60, 40], [73, 68, 63], [96, 86, 63], [73, 67, 6], [48, 66, 60], [47, 82, 87], [60, 52, 74]]) h = np.array([[18, 31, 47], [15, 81, 72], [75, 93, 45], [57, 50, 3], [12, 80, 3], [82, 49, 31], [1, 21, 0], [79, 12, 29], [19, 52, 42], [86, 49, 15]]) dum_l = [{'maxf': 100, 'minf': 0, 'col': 0, 'nbins': 4}, {'maxf': 100, 'minf': 0, 'col': 1, 'nbins': 2}] group_g = clf._hyperBinning(g, dum_l) group_h = clf._hyperBinning(h, dum_l) cleaned_g, cleaned_h = clf.level_populations(group_g, group_h, g, h) assert cleaned_g.shape == cleaned_h.shape
def test_Qfactor(): """ TODO: how to test validity of Q-factor values? """ cam_id = ["ASTRICam"] features = {"ASTRICam": [[10, 1], [20, 2], [30, 3], [9, 0.9], [1, 10], [2, 20], [3, 30], [0.9, 9]]} target = {"ASTRICam": [1, 1, 1, 1, 0, 0, 0, 0]} clf = EventClassifier(cam_id_list=cam_id, n_estimators=10) clf.fit(features, target) # Now predict ev_feat = [{"ASTRICam": [[10, 1]]}, {"ASTRICam": [[2, 20]]}, {"ASTRICam": [[3, 30]]}, {"ASTRICam": [[100, 10]]}, {"ASTRICam": [[4, 40]]}, {"ASTRICam": [[0.5, 5]]}] true_labels = np.array([1, 0, 0, 1, 0, 0], dtype=np.int8) prediction = clf.predict_proba_by_event(X=ev_feat) # prediction is a two columns array # first column is the probability to belong to class "0" --> hadron # second column is the probability to belong to class "1" --> gamma # we are interested in the probability to be gamma proba_to_be_gamma = prediction[:, 1] Q, gammaness = clf.compute_Qfactor( proba=proba_to_be_gamma, labels=true_labels, nbins=2) assert Q.size != 0 assert Q.size == gammaness.size
def test_prepare_model(): cam_id_list = ["FlashCam", "ASTRICam"] feature_list = { "FlashCam": [ [1, 10], [2, 20], [3, 30], [0.9, 9], [10, 1], [20, 2], [30, 3], [9, 0.9], ], "ASTRICam": [ [10, 1], [20, 2], [30, 3], [9, 0.9], [1, 10], [2, 20], [3, 30], [0.9, 9], ], } target_list = { "FlashCam": ["a", "a", "a", "a", "b", "b", "b", "b"], "ASTRICam": ["a", "a", "a", "a", "b", "b", "b", "b"], } clf = EventClassifier(cam_id_list=cam_id_list, n_estimators=10) clf.fit(feature_list, target_list) return clf, cam_id_list
def test_prepare_model(): cam_id_list = ["FlashCam", "ASTRICam"] feature_list = {"FlashCam": [[1, 10], [2, 20], [3, 30], [0.9, 9], [10, 1], [20, 2], [30, 3], [9, 0.9]], "ASTRICam": [[10, 1], [20, 2], [30, 3], [9, 0.9], [1, 10], [2, 20], [3, 30], [0.9, 9]]} target_list = {"FlashCam": ["a", "a", "a", "a", "b", "b", "b", "b"], "ASTRICam": ["a", "a", "a", "a", "b", "b", "b", "b"]} clf = EventClassifier(cam_id_list=cam_id_list, n_estimators=10) clf.fit(feature_list, target_list) return clf, cam_id_list
def test_prepare_model_MLP(): cam_id_list = ["FlashCam", "ASTRICam"] feature_list = {"FlashCam": [[1, 10], [2, 20], [3, 30], [0.9, 9], [10, 1], [20, 2], [30, 3], [9, 0.9]], "ASTRICam": [[10, 1], [20, 2], [30, 3], [9, 0.9], [1, 10], [2, 20], [3, 30], [0.9, 9]]} target_list = {"FlashCam": ["a", "a", "a", "a", "b", "b", "b", "b"], "ASTRICam": ["a", "a", "a", "a", "b", "b", "b", "b"]} clf = EventClassifier(classifier=MLPClassifier, cam_id_list=cam_id_list, max_iter=400) scaled_features, scaler = EventClassifier.scale_features(cam_id_list, feature_list) # clf.fit(feature_list, target_list) clf.fit(scaled_features, target_list) return clf, cam_id_list, scaler
def test_fit_save_load_MLP(): clf, cam_id_list, scaler = test_prepare_model_MLP() with TemporaryDirectory() as d: temp_path = "/".join([d, "reg_{cam_id}.pkl"]) clf.save(temp_path) clf = EventClassifier.load(temp_path, cam_id_list) return clf, cam_id_list, scaler
def test_fit_save_load(): clf, cam_id_list = test_prepare_model() with TemporaryDirectory() as d: temp_path = "/".join([d, "reg_{cam_id}.pkl"]) clf.save(temp_path) clf = EventClassifier.load(temp_path, cam_id_list) return clf, cam_id_list
def test_hyperBinning(): clf = EventClassifier(cam_id_list=None) x = np.array([[26, 70, 53], [97, 20, 56], [35, 38, 81], [48, 60, 40], [73, 68, 63], [96, 86, 63], [73, 67, 6], [48, 66, 60], [47, 82, 87], [60, 52, 74]]) dum_l = [{'maxf': max(x[:, 0]), 'minf': min(x[:, 0]), 'col': 0, 'nbins': 4}, {'maxf': max(x[:, 1]), 'minf': min(x[:, 1]), 'col': 1, 'nbins': 2}] dum_g = clf._hyperBinning(x, dum_l) assert np.all(dum_g.size() == (1, 1, 3, 2, 1))
def test_hyperBinning(): clf = EventClassifier(cam_id_list=None) x = np.array([[26, 70, 53], [97, 20, 56], [35, 38, 81], [48, 60, 40], [73, 68, 63], [96, 86, 63], [73, 67, 6], [48, 66, 60], [47, 82, 87], [60, 52, 74]]) dum_l = [{ 'maxf': max(x[:, 0]), 'minf': min(x[:, 0]), 'col': 0, 'nbins': 4 }, { 'maxf': max(x[:, 1]), 'minf': min(x[:, 1]), 'col': 1, 'nbins': 2 }] dum_g = clf._hyperBinning(x, dum_l) assert np.all(dum_g.size() == (1, 1, 3, 2, 1))
def test_prepare_model_MLP(): cam_id_list = ["FlashCam", "ASTRICam"] feature_list = { "FlashCam": [[1, 10], [2, 20], [3, 30], [0.9, 9], [10, 1], [20, 2], [30, 3], [9, 0.9]], "ASTRICam": [[10, 1], [20, 2], [30, 3], [9, 0.9], [1, 10], [2, 20], [3, 30], [0.9, 9]] } target_list = { "FlashCam": ["a", "a", "a", "a", "b", "b", "b", "b"], "ASTRICam": ["a", "a", "a", "a", "b", "b", "b", "b"] } clf = EventClassifier(classifier=MLPClassifier, cam_id_list=cam_id_list, max_iter=400) scaled_features, scaler = EventClassifier.scale_features( cam_id_list, feature_list) # clf.fit(feature_list, target_list) clf.fit(scaled_features, target_list) return clf, cam_id_list, scaler
def test_Qfactor(): """ TODO: how to test validity of Q-factor values? """ cam_id = ["ASTRICam"] features = { "ASTRICam": [ [10, 1], [20, 2], [30, 3], [9, 0.9], [1, 10], [2, 20], [3, 30], [0.9, 9], ] } target = {"ASTRICam": [1, 1, 1, 1, 0, 0, 0, 0]} clf = EventClassifier(cam_id_list=cam_id, n_estimators=10) clf.fit(features, target) # Now predict ev_feat = [ { "ASTRICam": [[10, 1]] }, { "ASTRICam": [[2, 20]] }, { "ASTRICam": [[3, 30]] }, { "ASTRICam": [[100, 10]] }, { "ASTRICam": [[4, 40]] }, { "ASTRICam": [[0.5, 5]] }, ] true_labels = np.array([1, 0, 0, 1, 0, 0], dtype=np.int8) prediction = clf.predict_proba_by_event(X=ev_feat) # prediction is a two columns array # first column is the probability to belong to class "0" --> hadron # second column is the probability to belong to class "1" --> gamma # we are interested in the probability to be gamma proba_to_be_gamma = prediction[:, 1] Q, gammaness = clf.compute_Qfactor(proba=proba_to_be_gamma, labels=true_labels, nbins=2) assert Q.size != 0 assert Q.size == gammaness.size
def test_pipeline_classifier(): cam_id_list = ["FlashCam", "ASTRICam"] feature_list = { "FlashCam": [ [1, 10], [2, 20], [3, 30], [0.9, 9], [10, 1], [20, 2], [30, 3], [9, 0.9], ], "ASTRICam": [ [10, 1], [20, 2], [30, 3], [9, 0.9], [1, 10], [2, 20], [3, 30], [0.9, 9], ], } target_list = { "FlashCam": ["a", "a", "a", "a", "b", "b", "b", "b"], "ASTRICam": ["a", "a", "a", "a", "b", "b", "b", "b"], } estimators = [("scaler", StandardScaler()), ("clf", MLPClassifier(max_iter=400))] clf = EventClassifier(classifier=Pipeline, steps=estimators, cam_id_list=cam_id_list) clf.fit(feature_list, target_list) prediction = clf.predict_by_event([{ "ASTRICam": [[10, 1]] }, { "ASTRICam": [[2, 20]] }, { "ASTRICam": [[3, 30]] }]) assert (prediction == ["a", "b", "b"]).all() prediction = clf.predict_by_event([{ "FlashCam": [[10, 1]] }, { "FlashCam": [[2, 20]] }, { "FlashCam": [[3, 30]] }]) assert (prediction == ["b", "a", "a"]).all()
def main(): # Argument parser parser = make_argparser() parser.add_argument("--regressor_dir", default="./", help="regressors directory") parser.add_argument("--classifier_dir", default="./", help="regressors directory") parser.add_argument( "--force_tailcut_for_extended_cleaning", type=str2bool, default=False, help="For tailcut cleaning for energy/score estimation", ) parser.add_argument( "--save_images", action="store_true", help="Save images in images.h5 (one file testing)", ) args = parser.parse_args() # Read configuration file cfg = load_config(args.config_file) # Read site layout site = cfg["General"]["site"] array = cfg["General"]["array"] cameras = cfg["General"]["cam_id_list"] # Add force_tailcut_for_extended_cleaning in configuration cfg["General"][ "force_tailcut_for_extended_cleaning"] = args.force_tailcut_for_extended_cleaning cfg["General"]["force_mode"] = "tail" force_mode = args.mode if cfg["General"]["force_tailcut_for_extended_cleaning"] is True: force_mode = "tail" print("force_mode={}".format(force_mode)) print("mode={}".format(args.mode)) if args.infile_list: filenamelist = [] for f in args.infile_list: filenamelist += glob("{}/{}".format(args.indir, f)) filenamelist.sort() if not filenamelist: print("no files found; check indir: {}".format(args.indir)) exit(-1) # keeping track of events and where they were rejected evt_cutflow = CutFlow("EventCutFlow") img_cutflow = CutFlow("ImageCutFlow") # Event preparer preper = EventPreparer(config=cfg, mode=args.mode, event_cutflow=evt_cutflow, image_cutflow=img_cutflow) # Regressor and classifier methods regressor_method = cfg["EnergyRegressor"]["method_name"] classifier_method = cfg["GammaHadronClassifier"]["method_name"] use_proba_for_classifier = cfg["GammaHadronClassifier"]["use_proba"] if regressor_method in ["None", "none", None]: use_regressor = False else: use_regressor = True if classifier_method in ["None", "none", None]: use_classifier = False else: use_classifier = True # Classifiers if use_classifier: classifier_files = (args.classifier_dir + "/classifier_{mode}_{cam_id}_{classifier}.pkl.gz") clf_file = classifier_files.format( **{ "mode": force_mode, "wave_args": "mixed", "classifier": classifier_method, "cam_id": "{cam_id}", }) classifier = EventClassifier.load(clf_file, cam_id_list=cameras) # Regressors if use_regressor: regressor_files = (args.regressor_dir + "/regressor_{mode}_{cam_id}_{regressor}.pkl.gz") reg_file = regressor_files.format( **{ "mode": force_mode, "wave_args": "mixed", "regressor": regressor_method, "cam_id": "{cam_id}", }) regressor = EnergyRegressor.load(reg_file, cam_id_list=cameras) # catch ctr-c signal to exit current loop and still display results signal_handler = SignalHandler() signal.signal(signal.SIGINT, signal_handler) # Declaration of the column descriptor for the (possible) images file class StoredImages(tb.IsDescription): event_id = tb.Int32Col(dflt=1, pos=0) tel_id = tb.Int16Col(dflt=1, pos=1) dl1_phe_image = tb.Float32Col(shape=(1855), pos=2) mc_phe_image = tb.Float32Col(shape=(1855), pos=3) # this class defines the reconstruction parameters to keep track of class RecoEvent(tb.IsDescription): obs_id = tb.Int16Col(dflt=-1, pos=0) event_id = tb.Int32Col(dflt=-1, pos=1) NTels_trig = tb.Int16Col(dflt=0, pos=2) NTels_reco = tb.Int16Col(dflt=0, pos=3) NTels_reco_lst = tb.Int16Col(dflt=0, pos=4) NTels_reco_mst = tb.Int16Col(dflt=0, pos=5) NTels_reco_sst = tb.Int16Col(dflt=0, pos=6) mc_energy = tb.Float32Col(dflt=np.nan, pos=7) reco_energy = tb.Float32Col(dflt=np.nan, pos=8) reco_alt = tb.Float32Col(dflt=np.nan, pos=9) reco_az = tb.Float32Col(dflt=np.nan, pos=10) offset = tb.Float32Col(dflt=np.nan, pos=11) xi = tb.Float32Col(dflt=np.nan, pos=12) ErrEstPos = tb.Float32Col(dflt=np.nan, pos=13) ErrEstDir = tb.Float32Col(dflt=np.nan, pos=14) gammaness = tb.Float32Col(dflt=np.nan, pos=15) success = tb.BoolCol(dflt=False, pos=16) score = tb.Float32Col(dflt=np.nan, pos=17) h_max = tb.Float32Col(dflt=np.nan, pos=18) reco_core_x = tb.Float32Col(dflt=np.nan, pos=19) reco_core_y = tb.Float32Col(dflt=np.nan, pos=20) mc_core_x = tb.Float32Col(dflt=np.nan, pos=21) mc_core_y = tb.Float32Col(dflt=np.nan, pos=22) reco_outfile = tb.open_file( mode="w", # if no outfile name is given (i.e. don't to write the event list to disk), # need specify two "driver" arguments **({ "filename": args.outfile } if args.outfile else { "filename": "no_outfile.h5", "driver": "H5FD_CORE", "driver_core_backing_store": False, })) reco_table = reco_outfile.create_table("/", "reco_events", RecoEvent) reco_event = reco_table.row # Create the images file only if the user want to store the images if args.save_images is True: images_outfile = tb.open_file("images.h5", mode="w") images_table = {} images_phe = {} # Telescopes in analysis allowed_tels = set(prod3b_tel_ids(array, site=site)) for i, filename in enumerate(filenamelist): source = event_source(input_url=filename, allowed_tels=allowed_tels, max_events=args.max_events) # loop that cleans and parametrises the images and performs the reconstruction for ( event, dl1_phe_image, mc_phe_image, n_pixel_dict, hillas_dict, hillas_dict_reco, n_tels, tot_signal, max_signals, n_cluster_dict, reco_result, impact_dict, ) in preper.prepare_event(source): # Angular quantities run_array_direction = event.mcheader.run_array_direction # Angular separation between true and reco direction xi = angular_separation(event.mc.az, event.mc.alt, reco_result.az, reco_result.alt) # Angular separation bewteen the center of the camera and the reco direction. offset = angular_separation( run_array_direction[0], # az run_array_direction[1], # alt reco_result.az, reco_result.alt, ) # Height of shower maximum h_max = reco_result.h_max if hillas_dict is not None: # Estimate particle energy if use_regressor is True: energy_tel = np.zeros(len(hillas_dict.keys())) weight_tel = np.zeros(len(hillas_dict.keys())) for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = event.inst.subarray.tel[tel_id].camera.cam_id moments = hillas_dict[tel_id] model = regressor.model_dict[cam_id] # Features to be fed in the regressor features_img = np.array([ np.log10(moments.intensity), np.log10(impact_dict[tel_id].value), moments.width.value, moments.length.value, h_max.value, ]) energy_tel[idx] = model.predict([features_img]) weight_tel[idx] = moments.intensity reco_energy = np.sum( weight_tel * energy_tel) / sum(weight_tel) else: reco_energy = np.nan # Estimate particle score/gammaness if use_classifier is True: score_tel = np.zeros(len(hillas_dict.keys())) gammaness_tel = np.zeros(len(hillas_dict.keys())) weight_tel = np.zeros(len(hillas_dict.keys())) for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = event.inst.subarray.tel[tel_id].camera.cam_id moments = hillas_dict[tel_id] model = classifier.model_dict[cam_id] # Features to be fed in the classifier features_img = np.array([ np.log10(reco_energy), moments.width.value, moments.length.value, moments.skewness, moments.kurtosis, h_max.value, ]) # Output of classifier according to type of classifier if use_proba_for_classifier is False: score_tel[idx] = model.decision_function( [features_img]) else: gammaness_tel[idx] = model.predict_proba( [features_img])[:, 1] # Should test other weighting strategy (e.g. power of charge, impact, etc.) # For now, weighting a la Mars weight_tel[idx] = np.sqrt(moments.intensity) # Weight the final decision/proba if use_proba_for_classifier is True: gammaness = np.sum( weight_tel * gammaness_tel) / sum(weight_tel) else: score = np.sum( weight_tel * score_tel) / sum(weight_tel) else: score = np.nan gammaness = np.nan # Regardless if energy or gammaness is estimated, if the user # wants to save the images of the run we do it here # (Probably not the most efficient way, but for one file is ok) if args.save_images is True: for idx, tel_id in enumerate(hillas_dict.keys()): cam_id = event.inst.subarray.tel[tel_id].camera.cam_id if cam_id not in images_phe: images_table[cam_id] = images_outfile.create_table( "/", "_".join(["images", cam_id]), StoredImages) images_phe[cam_id] = images_table[cam_id].row shower = event.mc mc_core_x = shower.core_x mc_core_y = shower.core_y reco_core_x = reco_result.core_x reco_core_y = reco_result.core_y alt, az = reco_result.alt, reco_result.az # Fill table's attributes reco_event["NTels_trig"] = len(event.dl0.tels_with_data) reco_event["NTels_reco"] = len(hillas_dict) reco_event["NTels_reco_lst"] = n_tels["LST_LST_LSTCam"] reco_event["NTels_reco_mst"] = n_tels["MST_MST_NectarCam"] reco_event["NTels_reco_sst"] = n_tels["SST"] # will change reco_event["reco_energy"] = reco_energy reco_event["reco_alt"] = alt.to("deg").value reco_event["reco_az"] = az.to("deg").value reco_event["offset"] = offset.to("deg").value reco_event["xi"] = xi.to("deg").value reco_event["h_max"] = h_max.to("m").value reco_event["reco_core_x"] = reco_core_x.to("m").value reco_event["reco_core_y"] = reco_core_y.to("m").value reco_event["mc_core_x"] = mc_core_x.to("m").value reco_event["mc_core_y"] = mc_core_y.to("m").value if use_proba_for_classifier is True: reco_event["gammaness"] = gammaness else: reco_event["score"] = score reco_event["success"] = True reco_event["ErrEstPos"] = np.nan reco_event["ErrEstDir"] = np.nan else: reco_event["success"] = False # save basic event infos reco_event["mc_energy"] = event.mc.energy.to("TeV").value reco_event["event_id"] = event.r1.event_id reco_event["obs_id"] = event.r1.obs_id if args.save_images is True: images_phe[cam_id]["event_id"] = event.r0.event_id images_phe[cam_id]["tel_id"] = tel_id images_phe[cam_id]["dl1_phe_image"] = dl1_phe_image images_phe[cam_id]["mc_phe_image"] = mc_phe_image images_phe[cam_id].append() # Fill table reco_table.flush() reco_event.append() if signal_handler.stop: break if signal_handler.stop: break # make sure everything gets written out nicely reco_table.flush() if args.save_images is True: for table in images_table.values(): table.flush() # Add in meta-data's table? try: print() evt_cutflow() print() img_cutflow() except ZeroDivisionError: pass print("Job done!")
cl = np.array(cl) telescope_weights[cam_id] = np.ones_like(cl, dtype=np.float) if args.unify: telescope_weights[cam_id][cl == 'g'] = \ 1 / np.count_nonzero(cl == 'g') telescope_weights[cam_id][cl == 'p'] = \ 1 / np.count_nonzero(cl == 'p') print("number of g:", np.count_nonzero(cl == 'g')) print("number of p:", np.count_nonzero(cl == 'p')) print() # use default random forest classifier clf_kwargs = {'n_estimators': 40, 'max_depth': None, 'min_samples_split': 2, 'random_state': 0, 'cam_id_list': cam_id_list} classifier = EventClassifier(**clf_kwargs) classifier.fit(features, classes, telescope_weights) if args.store: classifier.save(args.outpath.format(mode=args.mode, classifier=classifier, cam_id="{cam_id}")) fig = classifier.show_importances(ClassifierFeatures._fields) fig.set_size_inches(15, 10) for ax in fig.axes: plt.sca(ax) plt.xticks(rotation=45) for label in ax.get_xmajorticklabels(): label.set_horizontalalignment("right") plt.subplots_adjust(top=0.9, bottom=0.135, left=0.034, right=0.98,