Ejemplo n.º 1
0
def main():

    # Argument parser
    parser = make_argparser()
    parser.add_argument("--regressor_dir",
                        default="./",
                        help="regressors directory")
    parser.add_argument("--classifier_dir",
                        default="./",
                        help="regressors directory")
    parser.add_argument(
        "--force_tailcut_for_extended_cleaning",
        type=str2bool,
        default=False,
        help="For tailcut cleaning for energy/score estimation",
    )
    parser.add_argument(
        "--save_images",
        action="store_true",
        help="Save images in images.h5 (one file testing)",
    )
    args = parser.parse_args()

    # Read configuration file
    cfg = load_config(args.config_file)

    # Read site layout
    site = cfg["General"]["site"]
    array = cfg["General"]["array"]
    cameras = cfg["General"]["cam_id_list"]

    # Add force_tailcut_for_extended_cleaning in configuration
    cfg["General"][
        "force_tailcut_for_extended_cleaning"] = args.force_tailcut_for_extended_cleaning
    cfg["General"]["force_mode"] = "tail"
    force_mode = args.mode
    if cfg["General"]["force_tailcut_for_extended_cleaning"] is True:
        force_mode = "tail"
    print("force_mode={}".format(force_mode))
    print("mode={}".format(args.mode))

    if args.infile_list:
        filenamelist = []
        for f in args.infile_list:
            filenamelist += glob("{}/{}".format(args.indir, f))
        filenamelist.sort()

    if not filenamelist:
        print("no files found; check indir: {}".format(args.indir))
        exit(-1)

    # keeping track of events and where they were rejected
    evt_cutflow = CutFlow("EventCutFlow")
    img_cutflow = CutFlow("ImageCutFlow")

    # Event preparer
    preper = EventPreparer(config=cfg,
                           mode=args.mode,
                           event_cutflow=evt_cutflow,
                           image_cutflow=img_cutflow)

    # Regressor and classifier methods
    regressor_method = cfg["EnergyRegressor"]["method_name"]
    classifier_method = cfg["GammaHadronClassifier"]["method_name"]
    use_proba_for_classifier = cfg["GammaHadronClassifier"]["use_proba"]

    if regressor_method in ["None", "none", None]:
        use_regressor = False
    else:
        use_regressor = True

    if classifier_method in ["None", "none", None]:
        use_classifier = False
    else:
        use_classifier = True

    # Classifiers
    if use_classifier:
        classifier_files = (args.classifier_dir +
                            "/classifier_{mode}_{cam_id}_{classifier}.pkl.gz")
        clf_file = classifier_files.format(
            **{
                "mode": force_mode,
                "wave_args": "mixed",
                "classifier": classifier_method,
                "cam_id": "{cam_id}",
            })
        classifier = EventClassifier.load(clf_file, cam_id_list=cameras)

    # Regressors
    if use_regressor:
        regressor_files = (args.regressor_dir +
                           "/regressor_{mode}_{cam_id}_{regressor}.pkl.gz")
        reg_file = regressor_files.format(
            **{
                "mode": force_mode,
                "wave_args": "mixed",
                "regressor": regressor_method,
                "cam_id": "{cam_id}",
            })
        regressor = EnergyRegressor.load(reg_file, cam_id_list=cameras)

    # catch ctr-c signal to exit current loop and still display results
    signal_handler = SignalHandler()
    signal.signal(signal.SIGINT, signal_handler)

    # Declaration of the column descriptor for the (possible) images file
    class StoredImages(tb.IsDescription):
        event_id = tb.Int32Col(dflt=1, pos=0)
        tel_id = tb.Int16Col(dflt=1, pos=1)
        dl1_phe_image = tb.Float32Col(shape=(1855), pos=2)
        mc_phe_image = tb.Float32Col(shape=(1855), pos=3)

    # this class defines the reconstruction parameters to keep track of
    class RecoEvent(tb.IsDescription):
        obs_id = tb.Int16Col(dflt=-1, pos=0)
        event_id = tb.Int32Col(dflt=-1, pos=1)
        NTels_trig = tb.Int16Col(dflt=0, pos=2)
        NTels_reco = tb.Int16Col(dflt=0, pos=3)
        NTels_reco_lst = tb.Int16Col(dflt=0, pos=4)
        NTels_reco_mst = tb.Int16Col(dflt=0, pos=5)
        NTels_reco_sst = tb.Int16Col(dflt=0, pos=6)
        mc_energy = tb.Float32Col(dflt=np.nan, pos=7)
        reco_energy = tb.Float32Col(dflt=np.nan, pos=8)
        reco_alt = tb.Float32Col(dflt=np.nan, pos=9)
        reco_az = tb.Float32Col(dflt=np.nan, pos=10)
        offset = tb.Float32Col(dflt=np.nan, pos=11)
        xi = tb.Float32Col(dflt=np.nan, pos=12)
        ErrEstPos = tb.Float32Col(dflt=np.nan, pos=13)
        ErrEstDir = tb.Float32Col(dflt=np.nan, pos=14)
        gammaness = tb.Float32Col(dflt=np.nan, pos=15)
        success = tb.BoolCol(dflt=False, pos=16)
        score = tb.Float32Col(dflt=np.nan, pos=17)
        h_max = tb.Float32Col(dflt=np.nan, pos=18)
        reco_core_x = tb.Float32Col(dflt=np.nan, pos=19)
        reco_core_y = tb.Float32Col(dflt=np.nan, pos=20)
        mc_core_x = tb.Float32Col(dflt=np.nan, pos=21)
        mc_core_y = tb.Float32Col(dflt=np.nan, pos=22)

    reco_outfile = tb.open_file(
        mode="w",
        # if no outfile name is given (i.e. don't to write the event list to disk),
        # need specify two "driver" arguments
        **({
            "filename": args.outfile
        } if args.outfile else {
            "filename": "no_outfile.h5",
            "driver": "H5FD_CORE",
            "driver_core_backing_store": False,
        }))

    reco_table = reco_outfile.create_table("/", "reco_events", RecoEvent)
    reco_event = reco_table.row

    # Create the images file only if the user want to store the images
    if args.save_images is True:
        images_outfile = tb.open_file("images.h5", mode="w")
        images_table = {}
        images_phe = {}

    # Telescopes in analysis
    allowed_tels = set(prod3b_tel_ids(array, site=site))
    for i, filename in enumerate(filenamelist):

        source = event_source(input_url=filename,
                              allowed_tels=allowed_tels,
                              max_events=args.max_events)
        # loop that cleans and parametrises the images and performs the reconstruction
        for (
                event,
                dl1_phe_image,
                mc_phe_image,
                n_pixel_dict,
                hillas_dict,
                hillas_dict_reco,
                n_tels,
                tot_signal,
                max_signals,
                n_cluster_dict,
                reco_result,
                impact_dict,
        ) in preper.prepare_event(source):

            # Angular quantities
            run_array_direction = event.mcheader.run_array_direction

            # Angular separation between true and reco direction
            xi = angular_separation(event.mc.az, event.mc.alt, reco_result.az,
                                    reco_result.alt)

            # Angular separation bewteen the center of the camera and the reco direction.
            offset = angular_separation(
                run_array_direction[0],  # az
                run_array_direction[1],  # alt
                reco_result.az,
                reco_result.alt,
            )

            # Height of shower maximum
            h_max = reco_result.h_max

            if hillas_dict is not None:

                # Estimate particle energy
                if use_regressor is True:
                    energy_tel = np.zeros(len(hillas_dict.keys()))
                    weight_tel = np.zeros(len(hillas_dict.keys()))

                    for idx, tel_id in enumerate(hillas_dict.keys()):
                        cam_id = event.inst.subarray.tel[tel_id].camera.cam_id
                        moments = hillas_dict[tel_id]
                        model = regressor.model_dict[cam_id]

                        # Features to be fed in the regressor
                        features_img = np.array([
                            np.log10(moments.intensity),
                            np.log10(impact_dict[tel_id].value),
                            moments.width.value,
                            moments.length.value,
                            h_max.value,
                        ])

                        energy_tel[idx] = model.predict([features_img])
                        weight_tel[idx] = moments.intensity

                    reco_energy = np.sum(
                        weight_tel * energy_tel) / sum(weight_tel)
                else:
                    reco_energy = np.nan

                # Estimate particle score/gammaness
                if use_classifier is True:
                    score_tel = np.zeros(len(hillas_dict.keys()))
                    gammaness_tel = np.zeros(len(hillas_dict.keys()))
                    weight_tel = np.zeros(len(hillas_dict.keys()))

                    for idx, tel_id in enumerate(hillas_dict.keys()):
                        cam_id = event.inst.subarray.tel[tel_id].camera.cam_id
                        moments = hillas_dict[tel_id]
                        model = classifier.model_dict[cam_id]
                        # Features to be fed in the classifier
                        features_img = np.array([
                            np.log10(reco_energy),
                            moments.width.value,
                            moments.length.value,
                            moments.skewness,
                            moments.kurtosis,
                            h_max.value,
                        ])
                        # Output of classifier according to type of classifier
                        if use_proba_for_classifier is False:
                            score_tel[idx] = model.decision_function(
                                [features_img])
                        else:
                            gammaness_tel[idx] = model.predict_proba(
                                [features_img])[:, 1]
                        # Should test other weighting strategy (e.g. power of charge, impact, etc.)
                        # For now, weighting a la Mars
                        weight_tel[idx] = np.sqrt(moments.intensity)

                    # Weight the final decision/proba
                    if use_proba_for_classifier is True:
                        gammaness = np.sum(
                            weight_tel * gammaness_tel) / sum(weight_tel)
                    else:
                        score = np.sum(
                            weight_tel * score_tel) / sum(weight_tel)
                else:
                    score = np.nan
                    gammaness = np.nan

                # Regardless if energy or gammaness is estimated, if the user
                # wants to save the images of the run we do it here
                # (Probably not the most efficient way, but for one file is ok)
                if args.save_images is True:
                    for idx, tel_id in enumerate(hillas_dict.keys()):
                        cam_id = event.inst.subarray.tel[tel_id].camera.cam_id
                        if cam_id not in images_phe:
                            images_table[cam_id] = images_outfile.create_table(
                                "/", "_".join(["images", cam_id]),
                                StoredImages)
                            images_phe[cam_id] = images_table[cam_id].row

                shower = event.mc
                mc_core_x = shower.core_x
                mc_core_y = shower.core_y

                reco_core_x = reco_result.core_x
                reco_core_y = reco_result.core_y

                alt, az = reco_result.alt, reco_result.az

                # Fill table's attributes
                reco_event["NTels_trig"] = len(event.dl0.tels_with_data)
                reco_event["NTels_reco"] = len(hillas_dict)
                reco_event["NTels_reco_lst"] = n_tels["LST_LST_LSTCam"]
                reco_event["NTels_reco_mst"] = n_tels["MST_MST_NectarCam"]
                reco_event["NTels_reco_sst"] = n_tels["SST"]  # will change
                reco_event["reco_energy"] = reco_energy
                reco_event["reco_alt"] = alt.to("deg").value
                reco_event["reco_az"] = az.to("deg").value
                reco_event["offset"] = offset.to("deg").value
                reco_event["xi"] = xi.to("deg").value
                reco_event["h_max"] = h_max.to("m").value
                reco_event["reco_core_x"] = reco_core_x.to("m").value
                reco_event["reco_core_y"] = reco_core_y.to("m").value
                reco_event["mc_core_x"] = mc_core_x.to("m").value
                reco_event["mc_core_y"] = mc_core_y.to("m").value
                if use_proba_for_classifier is True:
                    reco_event["gammaness"] = gammaness
                else:
                    reco_event["score"] = score
                reco_event["success"] = True
                reco_event["ErrEstPos"] = np.nan
                reco_event["ErrEstDir"] = np.nan
            else:
                reco_event["success"] = False

            # save basic event infos
            reco_event["mc_energy"] = event.mc.energy.to("TeV").value
            reco_event["event_id"] = event.r1.event_id
            reco_event["obs_id"] = event.r1.obs_id

            if args.save_images is True:
                images_phe[cam_id]["event_id"] = event.r0.event_id
                images_phe[cam_id]["tel_id"] = tel_id
                images_phe[cam_id]["dl1_phe_image"] = dl1_phe_image
                images_phe[cam_id]["mc_phe_image"] = mc_phe_image

                images_phe[cam_id].append()

            # Fill table
            reco_table.flush()
            reco_event.append()

            if signal_handler.stop:
                break
        if signal_handler.stop:
            break

    # make sure everything gets written out nicely
    reco_table.flush()

    if args.save_images is True:
        for table in images_table.values():
            table.flush()

    # Add in meta-data's table?
    try:
        print()
        evt_cutflow()
        print()
        img_cutflow()

    except ZeroDivisionError:
        pass

    print("Job done!")
Ejemplo n.º 2
0
def main():

    # Argument parser
    parser = make_argparser()

    parser.add_argument(
        "--debug",
        action="store_true",
        help="Print debugging information",
    )

    parser.add_argument("--regressor_dir",
                        default="./",
                        help="regressors directory")
    parser.add_argument("--classifier_dir",
                        default="./",
                        help="regressors directory")
    parser.add_argument(
        "--force_tailcut_for_extended_cleaning",
        type=str2bool,
        default=False,
        help="For tailcut cleaning for energy/score estimation",
    )
    parser.add_argument(
        "--save_images",
        action="store_true",
        help="Save images in images.h5 (one file testing)",
    )

    parser.add_argument(
        "--regressor_config",
        type=str,
        default=None,
        help="Configuration file used to produce regressor model")
    parser.add_argument(
        "--classifier_config",
        type=str,
        default=None,
        help="Configuration file used to produce classification model")

    args = parser.parse_args()

    # Read configuration file
    cfg = load_config(args.config_file)

    try:  # If the user didn't specify a site and/or and array...
        site = cfg["General"]["site"]
        array = cfg["General"]["array"]
    except KeyError:  # ...raise an error and exit.
        print(bcolors.FAIL +
              "ERROR: make sure that both 'site' and 'array' are " +
              "specified in the analysis configuration file!" + bcolors.ENDC)
        exit()

    # Add force_tailcut_for_extended_cleaning in configuration
    cfg["General"][
        "force_tailcut_for_extended_cleaning"] = args.force_tailcut_for_extended_cleaning
    cfg["General"]["force_mode"] = "tail"
    force_mode = args.mode
    if cfg["General"]["force_tailcut_for_extended_cleaning"] is True:
        force_mode = "tail"
    print("force_mode={}".format(force_mode))
    print("mode={}".format(args.mode))

    if args.infile_list:
        filenamelist = []
        for f in args.infile_list:
            filenamelist += glob("{}/{}".format(args.indir, f))
        filenamelist.sort()

    if not filenamelist:
        print("no files found; check indir: {}".format(args.indir))
        exit(-1)

    # Get the IDs of the involved telescopes and associated cameras together
    # with the equivalent focal lengths from the first event
    allowed_tels, cams_and_foclens, subarray = prod3b_array(
        filenamelist[0], site, array)

    # keeping track of events and where they were rejected
    evt_cutflow = CutFlow("EventCutFlow")
    img_cutflow = CutFlow("ImageCutFlow")

    # Event preparer
    preper = EventPreparer(
        config=cfg,
        subarray=subarray,
        cams_and_foclens=cams_and_foclens,
        mode=args.mode,
        event_cutflow=evt_cutflow,
        image_cutflow=img_cutflow,
    )

    # Regressor and classifier methods
    regressor_method = cfg["EnergyRegressor"]["method_name"]
    classifier_method = cfg["GammaHadronClassifier"]["method_name"]
    use_proba_for_classifier = cfg["GammaHadronClassifier"]["use_proba"]

    if regressor_method in ["None", "none", None]:
        print(bcolors.OKBLUE +
              "The energy of the event will NOT be estimated." + bcolors.ENDC)
        use_regressor = False
    else:
        use_regressor = True

    if classifier_method in ["None", "none", None]:
        if args.debug:
            print(bcolors.OKBLUE +
                  "The particle type of the event will NOT be estimated." +
                  bcolors.ENDC)
        use_classifier = False
    else:
        use_classifier = True

    # Classifiers
    if use_classifier:

        # Read configuration file
        classifier_config = load_config(args.classifier_config)

        classifier_files = (args.classifier_dir +
                            "/classifier_{cam_id}_{classifier}.pkl.gz")
        clf_file = classifier_files.format(
            **{
                "mode": force_mode,
                "wave_args": "mixed",
                "classifier": classifier_method,
                "cam_id": "{cam_id}",
            })
        classifiers = load_models(clf_file,
                                  cam_id_list=cams_and_foclens.keys())
        if args.debug:
            print(bcolors.OKBLUE +
                  "The particle type of the event will be estimated" +
                  " using the models stored in" + f" {args.classifier_dir}\n" +
                  bcolors.ENDC)

    # Regressors
    if use_regressor:

        # Read configuration file
        regressor_config = load_config(args.regressor_config)

        regressor_files = (args.regressor_dir +
                           "/regressor_{cam_id}_{regressor}.pkl.gz")
        reg_file = regressor_files.format(
            **{
                "mode": force_mode,
                "wave_args": "mixed",
                "regressor": regressor_method,
                "cam_id": "{cam_id}",
            })
        regressors = load_models(reg_file, cam_id_list=cams_and_foclens.keys())
        if args.debug:
            print(bcolors.OKBLUE +
                  "The energy of the event will be estimated" +
                  " using the models stored in" + f" {args.regressor_dir}\n" +
                  bcolors.ENDC)

    # catch ctr-c signal to exit current loop and still display results
    signal_handler = SignalHandler()
    signal.signal(signal.SIGINT, signal_handler)

    # Declaration of the column descriptor for the (possible) images file
    StoredImages = dict(
        event_id=tb.Int32Col(dflt=1, pos=0),
        tel_id=tb.Int16Col(dflt=1, pos=1)
        # reco_image, true_image and cleaning_mask_reco
        # are defined later sicne they depend on the number of pixels
    )

    # this class defines the reconstruction parameters to keep track of
    class RecoEvent(tb.IsDescription):
        obs_id = tb.Int16Col(dflt=-1, pos=0)
        event_id = tb.Int32Col(dflt=-1, pos=1)
        NTels_trig = tb.Int16Col(dflt=0, pos=2)
        NTels_reco = tb.Int16Col(dflt=0, pos=3)
        NTels_reco_lst = tb.Int16Col(dflt=0, pos=4)
        NTels_reco_mst = tb.Int16Col(dflt=0, pos=5)
        NTels_reco_sst = tb.Int16Col(dflt=0, pos=6)
        pointing_az = tb.Float32Col(dflt=np.nan, pos=7)
        pointing_alt = tb.Float32Col(dflt=np.nan, pos=8)
        true_az = tb.Float32Col(dflt=np.nan, pos=9)
        true_alt = tb.Float32Col(dflt=np.nan, pos=10)
        true_energy = tb.Float32Col(dflt=np.nan, pos=11)
        reco_energy = tb.Float32Col(dflt=np.nan, pos=12)
        reco_alt = tb.Float32Col(dflt=np.nan, pos=13)
        reco_az = tb.Float32Col(dflt=np.nan, pos=14)
        offset = tb.Float32Col(dflt=np.nan, pos=15)
        xi = tb.Float32Col(dflt=np.nan, pos=16)
        ErrEstPos = tb.Float32Col(dflt=np.nan, pos=17)
        ErrEstDir = tb.Float32Col(dflt=np.nan, pos=18)
        gammaness = tb.Float32Col(dflt=np.nan, pos=19)
        success = tb.BoolCol(dflt=False, pos=20)
        score = tb.Float32Col(dflt=np.nan, pos=21)
        h_max = tb.Float32Col(dflt=np.nan, pos=22)
        reco_core_x = tb.Float32Col(dflt=np.nan, pos=23)
        reco_core_y = tb.Float32Col(dflt=np.nan, pos=24)
        true_core_x = tb.Float32Col(dflt=np.nan, pos=25)
        true_core_y = tb.Float32Col(dflt=np.nan, pos=26)
        is_valid = tb.BoolCol(dflt=False, pos=27)

    reco_outfile = tb.open_file(
        mode="w",
        # if no outfile name is given (i.e. don't to write the event list to disk),
        # need specify two "driver" arguments
        **({
            "filename": args.outfile
        } if args.outfile else {
            "filename": "no_outfile.h5",
            "driver": "H5FD_CORE",
            "driver_core_backing_store": False,
        }))

    reco_table = reco_outfile.create_table("/", "reco_events", RecoEvent)
    reco_event = reco_table.row

    # Create the images file only if the user want to store the images
    if args.save_images is True:
        images_outfile = tb.open_file("images.h5", mode="w")
        images_table = {}
        images_phe = {}

    for i, filename in enumerate(filenamelist):

        source = EventSource(input_url=filename,
                             allowed_tels=allowed_tels,
                             max_events=args.max_events)
        # loop that cleans and parametrises the images and performs the reconstruction
        for (
                event,
                reco_image,
                cleaning_mask_reco,
                cleaning_mask_clusters,
                true_image,
                n_pixel_dict,
                hillas_dict,
                hillas_dict_reco,
                leakage_dict,
                n_tels,
                max_signals,
                n_cluster_dict,
                reco_result,
                impact_dict,
                good_event,
                good_for_reco,
        ) in preper.prepare_event(source,
                                  save_images=args.save_images,
                                  debug=args.debug):

            # True direction
            true_az = event.simulation.shower.az
            true_alt = event.simulation.shower.alt

            # Array pointing in AltAz frame
            pointing_az = event.pointing.array_azimuth
            pointing_alt = event.pointing.array_altitude

            if good_event:  # aka it has been successfully reconstructed

                # Angular separation between
                # - true direction
                # - reconstruted direction
                xi = angular_separation(event.simulation.shower.az,
                                        event.simulation.shower.alt,
                                        reco_result.az, reco_result.alt)

                # Angular separation between
                # - center of the array's FoV
                # - reconstructed direction
                offset = angular_separation(
                    pointing_az,
                    pointing_alt,
                    reco_result.az,
                    reco_result.alt,
                )

                # Reconstructed height of shower maximum
                h_max = reco_result.h_max

                # Reconstructed position of the shower's core on the ground
                reco_core_x = reco_result.core_x
                reco_core_y = reco_result.core_y

                # Reconstructed direction of the shower's in the sky
                alt, az = reco_result.alt, reco_result.az

                # Successfully reconstructed shower
                is_valid = True

            else:  # no successful reconstruction assign dummy values

                xi = np.nan * u.deg
                offset = np.nan * u.deg
                reco_core_x = np.nan * u.m
                reco_core_y = np.nan * u.m
                h_max = np.nan * u.m
                alt = np.nan * u.deg
                az = np.nan * u.deg
                is_valid = False
                reco_energy = np.nan
                score = np.nan
                gammaness = np.nan
                reco_event["success"] = False

            # Estimate particle energy
            if use_regressor and is_valid:
                energy_tel = np.zeros(len(hillas_dict.keys()))
                energy_tel_classifier = {}
                weight_tel = np.zeros(len(hillas_dict.keys()))

                for idx, tel_id in enumerate(hillas_dict.keys()):

                    cam_id = source.subarray.tel[tel_id].camera.camera_name
                    moments = hillas_dict[tel_id]

                    model = regressors[cam_id]

                    ############################################################
                    #                  GET FEATURES
                    ############################################################

                    # Read feature list from model configutation file
                    features_basic = regressor_config["FeatureList"]["Basic"]
                    features_derived = regressor_config["FeatureList"][
                        "Derived"]
                    features = features_basic + list(features_derived)

                    # Create a pandas Dataframe with basic quantities
                    # This is needed in order to connect the I/O system of the
                    # model inputs to the in-memory computation of this script
                    data = pd.DataFrame({
                        "hillas_intensity": [moments.intensity],
                        "hillas_width": [moments.width.to("deg").value],
                        "hillas_length": [moments.length.to("deg").value],
                        "hillas_x": [moments.x.to("deg").value],
                        "hillas_y": [moments.y.to("deg").value],
                        "hillas_phi": [moments.phi.to("deg").value],
                        "hillas_r": [moments.r.to("deg").value],
                        "leakage_intensity_width_1_reco":
                        [leakage_dict[tel_id]['leak1_reco']],
                        "leakage_intensity_width_2_reco":
                        [leakage_dict[tel_id]['leak2_reco']],
                        "leakage_intensity_width_1":
                        [leakage_dict[tel_id]['leak1']],
                        "leakage_intensity_width_2":
                        [leakage_dict[tel_id]['leak2']],
                        "az": [reco_result.az.to("deg").value],
                        "alt": [reco_result.alt.to("deg").value],
                        "h_max": [h_max.value],
                        "impact_dist": [impact_dict[tel_id].to("m").value],
                    })

                    # Compute derived features and add them to the dataframe
                    for key, expression in features_derived.items():
                        if key not in data:
                            data.eval(f'{key} = {expression}', inplace=True)

                    # sort features_to_use alphabetically to ensure order
                    # preservation with model.fit in protopipe.mva
                    features = sorted(features)

                    # Select the values for the full set of features
                    features_values = data[features].to_numpy()

                    ############################################################

                    if good_for_reco[tel_id] == 1:
                        energy_tel[idx] = model.predict(features_values)
                    else:
                        energy_tel[idx] = np.nan

                    weight_tel[idx] = moments.intensity

                    # Record the values regardless of the validity
                    # We don't use this now, but it should be recorded
                    energy_tel_classifier[tel_id] = energy_tel[idx]

                # Use only images with valid estimated energies to calculate
                # the average
                energy_tel_selected = energy_tel[~np.isnan(energy_tel)]
                weight_tel_selected = weight_tel[~np.isnan(energy_tel)]

                # Try getting the average weighted energy of the shower
                # If no image had a valid estimated energy record it as nan
                if len(energy_tel_selected) == 0:
                    reco_energy = np.nan
                    energy_estimated = False
                else:
                    reco_energy = np.sum(
                        weight_tel_selected *
                        energy_tel_selected) / sum(weight_tel_selected)
                    energy_estimated = True
            else:
                reco_energy = np.nan
                energy_estimated = False

            # Estimate particle score/gammaness
            if use_classifier and is_valid:
                score_tel = np.zeros(len(hillas_dict.keys()))
                gammaness_tel = np.zeros(len(hillas_dict.keys()))
                weight_tel = np.zeros(len(hillas_dict.keys()))

                for idx, tel_id in enumerate(hillas_dict.keys()):

                    cam_id = source.subarray.tel[tel_id].camera.camera_name
                    moments = hillas_dict[tel_id]

                    model = classifiers[cam_id]

                    ############################################################
                    #                  GET FEATURES
                    ############################################################

                    # Read feature list from model configutation file
                    features_basic = classifier_config["FeatureList"]["Basic"]
                    features_derived = classifier_config["FeatureList"][
                        "Derived"]
                    features = features_basic + list(features_derived)

                    # Create a pandas Dataframe with basic quantities
                    # This is needed in order to connect the I/O system of the
                    # model inputs to the in-memory computation of this script
                    data = pd.DataFrame({
                        "hillas_intensity": [moments.intensity],
                        "hillas_width": [moments.width.to("deg").value],
                        "hillas_length": [moments.length.to("deg").value],
                        "hillas_x": [moments.x.to("deg").value],
                        "hillas_y": [moments.y.to("deg").value],
                        "hillas_phi": [moments.phi.to("deg").value],
                        "hillas_r": [moments.r.to("deg").value],
                        "leakage_intensity_width_1_reco":
                        [leakage_dict[tel_id]['leak1_reco']],
                        "leakage_intensity_width_2_reco":
                        [leakage_dict[tel_id]['leak2_reco']],
                        "leakage_intensity_width_1":
                        [leakage_dict[tel_id]['leak1']],
                        "leakage_intensity_width_2":
                        [leakage_dict[tel_id]['leak2']],
                        "az": [reco_result.az.to("deg").value],
                        "alt": [reco_result.alt.to("deg").value],
                        "h_max": [h_max.value],
                        "impact_dist": [impact_dict[tel_id].to("m").value],
                        "reco_energy":
                        reco_energy,
                        "reco_energy_tel":
                        energy_tel_classifier[tel_id],
                    })

                    # Compute derived features and add them to the dataframe
                    for key, expression in features_derived.items():
                        if key not in data:
                            data.eval(f'{key} = {expression}', inplace=True)

                    # sort features_to_use alphabetically to ensure order
                    # preservation with model.fit in protopipe.mva
                    features = sorted(features)

                    # Select the values for the full set of features
                    features_values = data[features].to_numpy()

                    ############################################################

                    # Here we check for valid telescope-wise energies
                    # Because it means that it's a good image
                    # WARNING: currently we should REQUIRE to estimate both
                    # energy AND particle type
                    if not np.isnan(energy_tel_classifier[tel_id]):
                        # Output of classifier according to type of classifier
                        if use_proba_for_classifier is False:
                            score_tel[idx] = model.decision_function(
                                features_values)
                        else:
                            gammaness_tel[idx] = model.predict_proba(
                                features_values)[:, 1]
                        weight_tel[idx] = np.sqrt(moments.intensity)
                    else:
                        # WARNING:
                        # this is true only because we use telescope-wise
                        # energies as a feature of the model!!!
                        score_tel[idx] = np.nan
                        gammaness_tel[idx] = np.nan

                # Use only images with valid estimated energies to calculate
                # the average
                if use_proba_for_classifier is False:
                    score_tel_selected = score_tel[~np.isnan(score_tel)]
                    weight_tel_selected = weight_tel[~np.isnan(score_tel)]
                else:
                    gammaness_tel_selected = gammaness_tel[
                        ~np.isnan(gammaness_tel)]
                    weight_tel_selected = weight_tel[~np.isnan(gammaness_tel)]

                # Try getting the average weighted score or gammaness
                # If no image had a valid estimated energy record it as nan
                if len(weight_tel_selected) > 0:

                    # Weight the final decision/proba
                    if use_proba_for_classifier is True:
                        gammaness = np.sum(
                            weight_tel_selected *
                            gammaness_tel_selected) / sum(weight_tel_selected)
                    else:
                        score = np.sum(
                            weight_tel_selected *
                            score_tel_selected) / sum(weight_tel_selected)

                    particle_type_estimated = True

                else:

                    score = np.nan
                    gammaness = np.nan
                    particle_type_estimated = False

            else:
                score = np.nan
                gammaness = np.nan
                particle_type_estimated = False

            if energy_estimated and particle_type_estimated:
                reco_event["success"] = True
            else:
                if args.debug:
                    print(
                        bcolors.WARNING +
                        f"energy_estimated = {energy_estimated}\n" +
                        f"particle_type_estimated = {particle_type_estimated}\n"
                        + bcolors.ENDC)
                reco_event["success"] = False

            # If the user wants to save the images of the run
            if args.save_images is True:
                for idx, tel_id in enumerate(hillas_dict.keys()):
                    cam_id = source.subarray.tel[tel_id].camera.camera_name
                    if cam_id not in images_phe:

                        n_pixels = source.subarray.tel[
                            tel_id].camera.geometry.n_pixels
                        StoredImages["true_image"] = tb.Float32Col(
                            shape=(n_pixels), pos=2)
                        StoredImages["reco_image"] = tb.Float32Col(
                            shape=(n_pixels), pos=3)
                        StoredImages["cleaning_mask_reco"] = tb.BoolCol(
                            shape=(n_pixels), pos=4)  # not in ctapipe
                        StoredImages["cleaning_mask_clusters"] = tb.BoolCol(
                            shape=(n_pixels), pos=5)  # not in ctapipe

                        images_table[cam_id] = images_outfile.create_table(
                            "/", "_".join(["images", cam_id]), StoredImages)
                    images_phe[cam_id] = images_table[cam_id].row

                    images_phe[cam_id]["event_id"] = event.index.event_id
                    images_phe[cam_id]["tel_id"] = tel_id
                    images_phe[cam_id]["reco_image"] = reco_image[tel_id]
                    images_phe[cam_id]["true_image"] = true_image[tel_id]
                    images_phe[cam_id][
                        "cleaning_mask_reco"] = cleaning_mask_reco[tel_id]
                    images_phe[cam_id][
                        "cleaning_mask_clusters"] = cleaning_mask_clusters[
                            tel_id]

                    images_phe[cam_id].append()

            # Now we start recording the data to file
            reco_event["event_id"] = event.index.event_id
            reco_event["obs_id"] = event.index.obs_id
            reco_event["NTels_trig"] = len(event.r1.tel.keys())
            reco_event["NTels_reco"] = len(hillas_dict)
            reco_event["NTels_reco_lst"] = n_tels["LST_LST_LSTCam"]
            reco_event["NTels_reco_mst"] = (n_tels["MST_MST_NectarCam"] +
                                            n_tels["MST_MST_FlashCam"] +
                                            n_tels["MST_SCT_SCTCam"])
            reco_event["NTels_reco_sst"] = (n_tels["SST_1M_DigiCam"] +
                                            n_tels["SST_ASTRI_ASTRICam"] +
                                            n_tels["SST_GCT_CHEC"])
            reco_event["pointing_az"] = pointing_az.to("deg").value
            reco_event["pointing_alt"] = pointing_alt.to("deg").value
            reco_event["reco_energy"] = reco_energy
            reco_event["reco_alt"] = alt.to("deg").value
            reco_event["reco_az"] = az.to("deg").value
            reco_event["offset"] = offset.to("deg").value
            reco_event["xi"] = xi.to("deg").value
            reco_event["h_max"] = h_max.to("m").value
            reco_event["reco_core_x"] = reco_core_x.to("m").value
            reco_event["reco_core_y"] = reco_core_y.to("m").value
            reco_event["is_valid"] = is_valid

            if use_proba_for_classifier is True:
                reco_event["gammaness"] = gammaness
            else:
                reco_event["score"] = score
            reco_event["ErrEstPos"] = np.nan
            reco_event["ErrEstDir"] = np.nan

            # Simulated information
            shower = event.simulation.shower
            mc_core_x = shower.core_x
            mc_core_y = shower.core_y
            reco_event["true_energy"] = shower.energy.to("TeV").value
            reco_event["true_az"] = true_az.to("deg").value
            reco_event["true_alt"] = true_alt.to("deg").value
            reco_event["true_core_x"] = mc_core_x.to("m").value
            reco_event["true_core_y"] = mc_core_y.to("m").value

            # Fill table
            reco_table.flush()
            reco_event.append()

            if signal_handler.stop:
                break
        if signal_handler.stop:
            break

    # make sure everything gets written out nicely
    reco_table.flush()

    if args.save_images is True:
        for table in images_table.values():
            table.flush()

    try:
        print()
        evt_cutflow()
        print()
        img_cutflow()

    except ZeroDivisionError:
        pass

    print("Job done!")
Ejemplo n.º 3
0
def main():

    # Argument parser
    parser = make_argparser()

    parser.add_argument(
        "--debug",
        action="store_true",
        help="Print debugging information",
    )

    parser.add_argument(
        "--save_images",
        action="store_true",
        help="Save also all images",
    )

    parser.add_argument(
        "--estimate_energy",
        type=str2bool,
        default=False,
        help="Estimate the events' energy with a regressor from\
         protopipe.scripts.build_model",
    )
    parser.add_argument("--regressor_dir",
                        type=str,
                        default="./",
                        help="regressors directory")
    args = parser.parse_args()

    # Read configuration file
    cfg = load_config(args.config_file)

    try:  # If the user didn't specify a site and/or and array...
        site = cfg["General"]["site"]
        array = cfg["General"]["array"]
    except KeyError:  # ...raise an error and exit.
        print("\033[91m ERROR: make sure that both 'site' and 'array' are "
              "specified in the analysis configuration file! \033[0m")
        exit()

    if args.infile_list:
        filenamelist = []
        for f in args.infile_list:
            filenamelist += glob("{}/{}".format(args.indir, f))
        filenamelist.sort()
    else:
        raise ValueError("don't know which input to use...")

    if not filenamelist:
        print("no files found; check indir: {}".format(args.indir))
        exit(-1)
    else:
        print("found {} files".format(len(filenamelist)))

    # Get the IDs of the involved telescopes and associated cameras together
    # with the equivalent focal lengths from the first event
    allowed_tels, cams_and_foclens, subarray = prod3b_array(
        filenamelist[0], site, array)

    # keeping track of events and where they were rejected
    evt_cutflow = CutFlow("EventCutFlow")
    img_cutflow = CutFlow("ImageCutFlow")

    preper = EventPreparer(
        config=cfg,
        subarray=subarray,
        cams_and_foclens=cams_and_foclens,
        mode=args.mode,
        event_cutflow=evt_cutflow,
        image_cutflow=img_cutflow,
    )

    # catch ctr-c signal to exit current loop and still display results
    signal_handler = SignalHandler()
    signal.signal(signal.SIGINT, signal_handler)

    # Regressor method
    regressor_method = cfg["EnergyRegressor"]["method_name"]

    # wrapper for the scikit-learn regressor
    if args.estimate_energy is True:
        regressor_files = (args.regressor_dir +
                           "/regressor_{mode}_{cam_id}_{regressor}.pkl.gz")
        reg_file = regressor_files.format(
            **{
                "mode": args.mode,
                "wave_args": "mixed",  # ToDo, control
                "regressor": regressor_method,
                "cam_id": "{cam_id}",
            })

        regressor = EnergyRegressor.load(reg_file,
                                         cam_id_list=cams_and_foclens.keys())

    # COLUMN DESCRIPTOR AS DICTIONARY
    # Column descriptor for the file containing output training data."""
    DataTrainingOutput = dict(
        # ======================================================================
        # ARRAY
        obs_id=tb.Int16Col(dflt=1, pos=0),
        event_id=tb.Int32Col(dflt=1, pos=1),
        tel_id=tb.Int16Col(dflt=1, pos=2),
        N_LST=tb.Int16Col(dflt=1, pos=3),
        N_MST=tb.Int16Col(dflt=1, pos=4),
        N_SST=tb.Int16Col(dflt=1, pos=5),
        n_tel_reco=tb.FloatCol(dflt=1, pos=6),
        n_tel_discri=tb.FloatCol(dflt=1, pos=7),
        # ======================================================================
        # DL1
        hillas_intensity_reco=tb.Float32Col(dflt=1, pos=8),
        hillas_intensity=tb.Float32Col(dflt=1, pos=9),
        hillas_x_reco=tb.Float32Col(dflt=1, pos=10),
        hillas_y_reco=tb.Float32Col(dflt=1, pos=11),
        hillas_x=tb.Float32Col(dflt=1, pos=12),
        hillas_y=tb.Float32Col(dflt=1, pos=13),
        hillas_r_reco=tb.Float32Col(dflt=1, pos=14),
        hillas_r=tb.Float32Col(dflt=1, pos=15),
        hillas_phi_reco=tb.Float32Col(dflt=1, pos=16),
        hillas_phi=tb.Float32Col(dflt=1, pos=17),
        hillas_length_reco=tb.Float32Col(dflt=1, pos=18),
        hillas_length=tb.Float32Col(dflt=1, pos=19),
        hillas_width_reco=tb.Float32Col(dflt=1, pos=20),
        hillas_width=tb.Float32Col(dflt=1, pos=21),
        hillas_psi_reco=tb.Float32Col(dflt=1, pos=22),
        hillas_psi=tb.Float32Col(dflt=1, pos=23),
        hillas_skewness_reco=tb.Float32Col(dflt=1, pos=24),
        hillas_skewness=tb.Float32Col(dflt=1, pos=25),
        hillas_kurtosis=tb.Float32Col(dflt=1, pos=26),
        hillas_kurtosis_reco=tb.Float32Col(dflt=1, pos=27),
        leakage_intensity_width_1_reco=tb.Float32Col(dflt=np.nan, pos=28),
        leakage_intensity_width_2_reco=tb.Float32Col(dflt=np.nan, pos=29),
        leakage_intensity_width_1=tb.Float32Col(dflt=np.nan, pos=30),
        leakage_intensity_width_2=tb.Float32Col(dflt=np.nan, pos=31),
        # The following are missing from current ctapipe DL1 output
        # Not sure if it's worth to add them
        hillas_ellipticity_reco=tb.FloatCol(dflt=1, pos=32),
        hillas_ellipticity=tb.FloatCol(dflt=1, pos=33),
        max_signal_cam=tb.Float32Col(dflt=1, pos=34),
        pixels=tb.Int16Col(dflt=1, pos=35),
        clusters=tb.Int16Col(dflt=-1, pos=36),
        # ======================================================================
        # DL2 - DIRECTION RECONSTRUCTION
        impact_dist=tb.Float32Col(dflt=1, pos=37),
        h_max=tb.Float32Col(dflt=1, pos=38),
        alt=tb.Float32Col(dflt=np.nan, pos=39),
        az=tb.Float32Col(dflt=np.nan, pos=40),
        err_est_pos=tb.Float32Col(dflt=1, pos=41),
        err_est_dir=tb.Float32Col(dflt=1, pos=42),
        xi=tb.Float32Col(dflt=np.nan, pos=43),
        offset=tb.Float32Col(dflt=np.nan, pos=44),
        mc_core_x=tb.FloatCol(dflt=1, pos=45),
        mc_core_y=tb.FloatCol(dflt=1, pos=46),
        reco_core_x=tb.FloatCol(dflt=1, pos=47),
        reco_core_y=tb.FloatCol(dflt=1, pos=48),
        mc_h_first_int=tb.FloatCol(dflt=1, pos=49),
        mc_x_max=tb.Float32Col(dflt=np.nan, pos=50),
        is_valid=tb.BoolCol(dflt=False, pos=51),
        good_image=tb.Int16Col(dflt=1, pos=52),
        # ======================================================================
        # DL2 - ENERGY ESTIMATION
        true_energy=tb.FloatCol(dflt=1, pos=53),
        reco_energy=tb.FloatCol(dflt=np.nan, pos=54),
        reco_energy_tel=tb.Float32Col(dflt=np.nan, pos=55),
        # ======================================================================
        # DL1 IMAGES
        # this is optional data saved by the user
        # since these data declarations require to know how many pixels
        # each saved image will have,
        # we add them later on, right before creating the table
        # We list them here for reference
        # true_image=tb.Float32Col(shape=(1855), pos=56),
        # reco_image=tb.Float32Col(shape=(1855), pos=57),
        # cleaning_mask_reco=tb.BoolCol(shape=(1855), pos=58),  # not in ctapipe
    )

    outfile = tb.open_file(args.outfile, mode="w")
    outTable = {}
    outData = {}

    for i, filename in enumerate(filenamelist):

        print("file: {} filename = {}".format(i, filename))

        source = event_source(input_url=filename,
                              allowed_tels=allowed_tels,
                              max_events=args.max_events)

        # loop that cleans and parametrises the images and performs the
        # reconstruction for each event
        for (
                event,
                reco_image,
                cleaning_mask_reco,
                cleaning_mask_clusters,
                true_image,
                n_pixel_dict,
                hillas_dict,
                hillas_dict_reco,
                leakage_dict,
                n_tels,
                max_signals,
                n_cluster_dict,
                reco_result,
                impact_dict,
                good_event,
                good_for_reco,
        ) in preper.prepare_event(source,
                                  save_images=args.save_images,
                                  debug=args.debug):

            # Angular quantities
            run_array_direction = event.mcheader.run_array_direction

            if good_event:

                xi = angular_separation(event.mc.az, event.mc.alt,
                                        reco_result.az, reco_result.alt)

                offset = angular_separation(
                    run_array_direction[0],  # az
                    run_array_direction[1],  # alt
                    reco_result.az,
                    reco_result.alt,
                )

                # Impact parameter
                reco_core_x = reco_result.core_x
                reco_core_y = reco_result.core_y

                # Height of shower maximum
                h_max = reco_result.h_max
                # Todo add conversion in number of radiation length,
                # need an atmosphere profile

                is_valid = True

            else:  # something went wrong and the shower's reconstruction failed

                xi = np.nan * u.deg
                offset = np.nan * u.deg
                reco_core_x = np.nan * u.m
                reco_core_y = np.nan * u.m
                h_max = np.nan * u.m
                reco_result.alt = np.nan * u.deg
                reco_result.az = np.nan * u.deg
                is_valid = False

            reco_energy = np.nan
            reco_energy_tel = dict()

            # Not optimal at all, two loop on tel!!!
            # For energy estimation
            # Estimate energy only if the shower was reconstructed
            if (args.estimate_energy is True) and is_valid:
                weight_tel = np.zeros(len(hillas_dict.keys()))
                energy_tel = np.zeros(len(hillas_dict.keys()))

                for idx, tel_id in enumerate(hillas_dict.keys()):

                    # use only images that survived cleaning and
                    # parametrization
                    if not good_for_reco[tel_id]:
                        # bad images will get an undetermined energy
                        # this is a per-telescope energy
                        # NOT the estimated energy for the shower
                        reco_energy_tel[tel_id] = np.nan
                        continue

                    cam_id = source.subarray.tel[tel_id].camera.camera_name
                    moments = hillas_dict[tel_id]
                    model = regressor.model_dict[cam_id]

                    features_img = np.array([
                        np.log10(moments.intensity),
                        np.log10(impact_dict[tel_id].value),
                        moments.width.value,
                        moments.length.value,
                        h_max.value,
                    ])

                    energy_tel[idx] = model.predict([features_img])
                    weight_tel[idx] = moments.intensity
                    reco_energy_tel[tel_id] = energy_tel[idx]

                reco_energy = np.sum(weight_tel * energy_tel) / sum(weight_tel)
            else:
                for idx, tel_id in enumerate(hillas_dict.keys()):
                    reco_energy_tel[tel_id] = np.nan

            for idx, tel_id in enumerate(hillas_dict.keys()):
                cam_id = source.subarray.tel[tel_id].camera.camera_name

                if cam_id not in outData:

                    if args.save_images is True:
                        # we define and save images content here, to make it
                        # adaptive to different cameras

                        n_pixels = source.subarray.tel[
                            tel_id].camera.geometry.n_pixels
                        DataTrainingOutput["true_image"] = tb.Float32Col(
                            shape=(n_pixels), pos=56)
                        DataTrainingOutput["reco_image"] = tb.Float32Col(
                            shape=(n_pixels), pos=57)
                        DataTrainingOutput["cleaning_mask_reco"] = tb.BoolCol(
                            shape=(n_pixels), pos=58)  # not in ctapipe
                        DataTrainingOutput[
                            "cleaning_mask_clusters"] = tb.BoolCol(
                                shape=(n_pixels), pos=58)  # not in ctapipe

                    outTable[cam_id] = outfile.create_table(
                        "/",
                        cam_id,
                        DataTrainingOutput,
                    )
                    outData[cam_id] = outTable[cam_id].row

                moments = hillas_dict[tel_id]
                ellipticity = moments.width / moments.length

                # Write to file also the Hillas parameters that have been used
                # to calculate reco_results

                moments_reco = hillas_dict_reco[tel_id]
                ellipticity_reco = moments_reco.width / moments_reco.length

                outData[cam_id]["good_image"] = good_for_reco[tel_id]
                outData[cam_id]["is_valid"] = is_valid
                outData[cam_id]["impact_dist"] = impact_dict[tel_id].to(
                    "m").value
                outData[cam_id]["max_signal_cam"] = max_signals[tel_id]
                outData[cam_id]["hillas_intensity"] = moments.intensity
                outData[cam_id]["N_LST"] = n_tels["LST_LST_LSTCam"]
                outData[cam_id]["N_MST"] = (n_tels["MST_MST_NectarCam"] +
                                            n_tels["MST_MST_FlashCam"] +
                                            n_tels["MST_SCT_SCTCam"])
                outData[cam_id]["N_SST"] = (n_tels["SST_1M_DigiCam"] +
                                            n_tels["SST_ASTRI_ASTRICam"] +
                                            n_tels["SST_GCT_CHEC"])
                outData[cam_id]["hillas_width"] = moments.width.to("deg").value
                outData[cam_id]["hillas_length"] = moments.length.to(
                    "deg").value
                outData[cam_id]["hillas_psi"] = moments.psi.to("deg").value
                outData[cam_id]["hillas_skewness"] = moments.skewness
                outData[cam_id]["hillas_kurtosis"] = moments.kurtosis
                outData[cam_id]["h_max"] = h_max.to("m").value
                outData[cam_id]["err_est_pos"] = np.nan
                outData[cam_id]["err_est_dir"] = np.nan
                outData[cam_id]["true_energy"] = event.mc.energy.to(
                    "TeV").value
                outData[cam_id]["hillas_x"] = moments.x.to("deg").value
                outData[cam_id]["hillas_y"] = moments.y.to("deg").value
                outData[cam_id]["hillas_phi"] = moments.phi.to("deg").value
                outData[cam_id]["hillas_r"] = moments.r.to("deg").value

                outData[cam_id]["pixels"] = n_pixel_dict[tel_id]
                outData[cam_id]["obs_id"] = event.index.obs_id
                outData[cam_id]["event_id"] = event.index.event_id
                outData[cam_id]["tel_id"] = tel_id
                outData[cam_id]["xi"] = xi.to("deg").value
                outData[cam_id]["reco_energy"] = reco_energy
                outData[cam_id]["hillas_ellipticity"] = ellipticity.value
                outData[cam_id]["clusters"] = n_cluster_dict[tel_id]
                outData[cam_id]["n_tel_discri"] = n_tels["GOOD images"]
                outData[cam_id]["mc_core_x"] = event.mc.core_x.to("m").value
                outData[cam_id]["mc_core_y"] = event.mc.core_y.to("m").value
                outData[cam_id]["reco_core_x"] = reco_core_x.to("m").value
                outData[cam_id]["reco_core_y"] = reco_core_y.to("m").value
                outData[cam_id]["mc_h_first_int"] = event.mc.h_first_int.to(
                    "m").value
                outData[cam_id]["offset"] = offset.to("deg").value
                outData[cam_id]["mc_x_max"] = event.mc.x_max.value  # g / cm2
                outData[cam_id]["alt"] = reco_result.alt.to("deg").value
                outData[cam_id]["az"] = reco_result.az.to("deg").value
                outData[cam_id]["reco_energy_tel"] = reco_energy_tel[tel_id]
                # Variables from hillas_dist_reco
                outData[cam_id]["n_tel_reco"] = n_tels["GOOD images"]
                outData[cam_id]["hillas_x_reco"] = moments_reco.x.to(
                    "deg").value
                outData[cam_id]["hillas_y_reco"] = moments_reco.y.to(
                    "deg").value
                outData[cam_id]["hillas_phi_reco"] = moments_reco.phi.to(
                    "deg").value
                outData[cam_id][
                    "hillas_ellipticity_reco"] = ellipticity_reco.value
                outData[cam_id]["hillas_r_reco"] = moments_reco.r.to(
                    "deg").value
                outData[cam_id]["hillas_skewness_reco"] = moments_reco.skewness
                outData[cam_id]["hillas_kurtosis_reco"] = moments_reco.kurtosis
                outData[cam_id]["hillas_width_reco"] = moments_reco.width.to(
                    "deg").value
                outData[cam_id]["hillas_length_reco"] = moments_reco.length.to(
                    "deg").value
                outData[cam_id]["hillas_psi_reco"] = moments_reco.psi.to(
                    "deg").value
                outData[cam_id][
                    "hillas_intensity_reco"] = moments_reco.intensity
                outData[cam_id][
                    "leakage_intensity_width_1_reco"] = leakage_dict[tel_id][
                        "leak1_reco"]
                outData[cam_id][
                    "leakage_intensity_width_2_reco"] = leakage_dict[tel_id][
                        "leak2_reco"]
                outData[cam_id]["leakage_intensity_width_1"] = leakage_dict[
                    tel_id]["leak1"]
                outData[cam_id]["leakage_intensity_width_2"] = leakage_dict[
                    tel_id]["leak2"]

                # =======================
                # IMAGES INFORMATION
                # =======================

                if args.save_images is True:
                    # we define and save images content here, to make it
                    # adaptive to different cameras

                    outData[cam_id]["true_image"] = true_image[tel_id]
                    outData[cam_id]["reco_image"] = reco_image[tel_id]
                    outData[cam_id]["cleaning_mask_reco"] = cleaning_mask_reco[
                        tel_id]
                    outData[cam_id][
                        "cleaning_mask_clusters"] = cleaning_mask_clusters[
                            tel_id]
                # =======================

                outData[cam_id].append()

            if signal_handler.stop:
                break
        if signal_handler.stop:
            break
    # make sure that all the events are properly stored
    for table in outTable.values():
        table.flush()

    print(bcolors.BOLD +
          "\n\n==================================================\n" +
          "Statistical summary of processed events and images\n" +
          "==================================================\n"
          # + bcolors.ENDC
          )

    evt_cutflow()

    # Catch specific cases
    triggered_events = evt_cutflow.cuts["min2Tels trig"][1]
    reconstructed_events = evt_cutflow.cuts["min2Tels reco"][1]

    if triggered_events == 0:
        print("\033[93mWARNING: No events have been triggered"
              " by the selected telescopes! \033[0m")
    else:
        print("\n")
        img_cutflow()
        if reconstructed_events == 0:
            print("\033[93m WARNING: None of the triggered events have been "
                  "properly reconstructed by the selected telescopes!\n"
                  "DL1 file will be empty! \033[0m")
        print(bcolors.ENDC)
Ejemplo n.º 4
0
def main():

    # Argument parser
    parser = make_argparser()
    parser.add_argument(
        "--estimate_energy",
        type=str2bool,
        default=False,
        help="Make estimation of energy",
    )
    parser.add_argument("--regressor_dir",
                        type=str,
                        default="./",
                        help="regressors directory")
    args = parser.parse_args()

    # Read configuration file
    cfg = load_config(args.config_file)

    # Read site layout
    site = cfg["General"]["site"]
    array = cfg["General"]["array"]

    if args.infile_list:
        filenamelist = []
        for f in args.infile_list:
            filenamelist += glob("{}/{}".format(args.indir, f))
        filenamelist.sort()
    else:
        raise ValueError("don't know which input to use...")

    if not filenamelist:
        print("no files found; check indir: {}".format(args.indir))
        exit(-1)
    else:
        print("found {} files".format(len(filenamelist)))

    # keeping track of events and where they were rejected
    evt_cutflow = CutFlow("EventCutFlow")
    img_cutflow = CutFlow("ImageCutFlow")

    preper = EventPreparer(config=cfg,
                           mode=args.mode,
                           event_cutflow=evt_cutflow,
                           image_cutflow=img_cutflow)

    # catch ctr-c signal to exit current loop and still display results
    signal_handler = SignalHandler()
    signal.signal(signal.SIGINT, signal_handler)

    # Regressor method
    regressor_method = cfg["EnergyRegressor"]["method_name"]

    # wrapper for the scikit-learn regressor
    if args.estimate_energy is True:
        # regressor_files = args.regressor_dir + "/regressor_{mode}_{cam_id}_{regressor}.pkl.gz"
        regressor_files = (args.regressor_dir +
                           "/regressor_{mode}_{cam_id}_{regressor}.pkl.gz")
        reg_file = regressor_files.format(
            **{
                "mode": args.mode,
                "wave_args": "mixed",  # ToDo, control
                "regressor": regressor_method,
                "cam_id": "{cam_id}",
            })

        # from IPython import embed
        # embed()

        regressor = EnergyRegressor.load(reg_file, cam_id_list=args.cam_ids)

    class EventFeatures(tb.IsDescription):
        impact_dist = tb.Float32Col(dflt=1, pos=0)
        sum_signal_evt = tb.Float32Col(dflt=1, pos=1)
        max_signal_cam = tb.Float32Col(dflt=1, pos=2)
        sum_signal_cam = tb.Float32Col(dflt=1, pos=3)
        N_LST = tb.Int16Col(dflt=1, pos=4)
        N_MST = tb.Int16Col(dflt=1, pos=5)
        N_SST = tb.Int16Col(dflt=1, pos=6)
        width = tb.Float32Col(dflt=1, pos=7)
        length = tb.Float32Col(dflt=1, pos=8)
        skewness = tb.Float32Col(dflt=1, pos=9)
        kurtosis = tb.Float32Col(dflt=1, pos=10)
        h_max = tb.Float32Col(dflt=1, pos=11)
        err_est_pos = tb.Float32Col(dflt=1, pos=12)
        err_est_dir = tb.Float32Col(dflt=1, pos=13)
        mc_energy = tb.FloatCol(dflt=1, pos=14)
        local_distance = tb.Float32Col(dflt=1, pos=15)
        n_pixel = tb.Int16Col(dflt=1, pos=16)
        n_cluster = tb.Int16Col(dflt=-1, pos=17)
        obs_id = tb.Int16Col(dflt=1, pos=18)
        event_id = tb.Int32Col(dflt=1, pos=19)
        tel_id = tb.Int16Col(dflt=1, pos=20)
        xi = tb.Float32Col(dflt=np.nan, pos=21)
        reco_energy = tb.FloatCol(dflt=np.nan, pos=22)
        ellipticity = tb.FloatCol(dflt=1, pos=23)
        n_tel_reco = tb.FloatCol(dflt=1, pos=24)
        n_tel_discri = tb.FloatCol(dflt=1, pos=25)
        mc_core_x = tb.FloatCol(dflt=1, pos=26)
        mc_core_y = tb.FloatCol(dflt=1, pos=27)
        reco_core_x = tb.FloatCol(dflt=1, pos=28)
        reco_core_y = tb.FloatCol(dflt=1, pos=29)
        mc_h_first_int = tb.FloatCol(dflt=1, pos=30)
        offset = tb.Float32Col(dflt=np.nan, pos=31)
        mc_x_max = tb.Float32Col(dflt=np.nan, pos=31)
        alt = tb.Float32Col(dflt=np.nan, pos=33)
        az = tb.Float32Col(dflt=np.nan, pos=34)
        reco_energy_tel = tb.Float32Col(dflt=np.nan, pos=35)
        # from hillas_reco
        ellipticity_reco = tb.FloatCol(dflt=1, pos=36)
        local_distance_reco = tb.Float32Col(dflt=1, pos=37)
        skewness_reco = tb.Float32Col(dflt=1, pos=38)
        kurtosis_reco = tb.Float32Col(dflt=1, pos=39)
        width_reco = tb.Float32Col(dflt=1, pos=40)
        length_reco = tb.Float32Col(dflt=1, pos=41)
        psi = tb.Float32Col(dflt=1, pos=42)
        psi_reco = tb.Float32Col(dflt=1, pos=43)
        sum_signal_cam_reco = tb.Float32Col(dflt=1, pos=44)

    feature_outfile = tb.open_file(args.outfile, mode="w")
    feature_table = {}
    feature_events = {}

    # Telescopes in analysis
    allowed_tels = set(prod3b_tel_ids(array, site=site))

    for i, filename in enumerate(filenamelist):

        print("file: {} filename = {}".format(i, filename))

        source = event_source(input_url=filename,
                              allowed_tels=allowed_tels,
                              max_events=args.max_events)

        # loop that cleans and parametrises the images and performs the reconstruction
        # for each event
        for (
                event,
                n_pixel_dict,
                hillas_dict,
                hillas_dict_reco,
                n_tels,
                tot_signal,
                max_signals,
                n_cluster_dict,
                reco_result,
                impact_dict,
        ) in preper.prepare_event(source):

            # Angular quantities
            run_array_direction = event.mcheader.run_array_direction

            xi = angular_separation(event.mc.az, event.mc.alt, reco_result.az,
                                    reco_result.alt)

            offset = angular_separation(
                run_array_direction[0],  # az
                run_array_direction[1],  # alt
                reco_result.az,
                reco_result.alt,
            )

            # Impact parameter
            reco_core_x = reco_result.core_x
            reco_core_y = reco_result.core_y

            # Height of shower maximum
            h_max = reco_result.h_max
            # Todo add conversion in number of radiation length, need an atmosphere profile

            reco_energy = np.nan
            reco_energy_tel = dict()

            # Not optimal at all, two loop on tel!!!
            # For energy estimation
            if args.estimate_energy is True:
                weight_tel = np.zeros(len(hillas_dict.keys()))
                energy_tel = np.zeros(len(hillas_dict.keys()))

                for idx, tel_id in enumerate(hillas_dict.keys()):
                    cam_id = event.inst.subarray.tel[tel_id].camera.cam_id
                    moments = hillas_dict[tel_id]
                    model = regressor.model_dict[cam_id]

                    features_img = np.array([
                        np.log10(moments.intensity),
                        np.log10(impact_dict[tel_id].value),
                        moments.width.value,
                        moments.length.value,
                        h_max.value,
                    ])

                    energy_tel[idx] = model.predict([features_img])
                    weight_tel[idx] = moments.intensity
                    reco_energy_tel[tel_id] = energy_tel[idx]

                reco_energy = np.sum(weight_tel * energy_tel) / sum(weight_tel)
            else:
                for idx, tel_id in enumerate(hillas_dict.keys()):
                    reco_energy_tel[tel_id] = np.nan

            for idx, tel_id in enumerate(hillas_dict.keys()):
                cam_id = event.inst.subarray.tel[tel_id].camera.cam_id

                if cam_id not in feature_events:
                    feature_table[cam_id] = feature_outfile.create_table(
                        "/", "_".join(["feature_events", cam_id]),
                        EventFeatures)
                    feature_events[cam_id] = feature_table[cam_id].row

                moments = hillas_dict[tel_id]
                ellipticity = moments.width / moments.length

                # Write to file also the Hillas parameters that have been used
                # to calculate reco_results

                moments_reco = hillas_dict_reco[tel_id]
                ellipticity_reco = moments_reco.width / moments_reco.length

                feature_events[cam_id]["impact_dist"] = (
                    impact_dict[tel_id].to("m").value)
                feature_events[cam_id]["sum_signal_evt"] = tot_signal
                feature_events[cam_id]["max_signal_cam"] = max_signals[tel_id]
                feature_events[cam_id]["sum_signal_cam"] = moments.intensity
                feature_events[cam_id]["N_LST"] = n_tels["LST"]
                feature_events[cam_id]["N_MST"] = n_tels["MST"]
                feature_events[cam_id]["N_SST"] = n_tels["SST"]
                feature_events[cam_id]["width"] = moments.width.to("m").value
                feature_events[cam_id]["length"] = moments.length.to("m").value
                feature_events[cam_id]["psi"] = moments.psi.to("deg").value
                feature_events[cam_id]["skewness"] = moments.skewness
                feature_events[cam_id]["kurtosis"] = moments.kurtosis
                feature_events[cam_id]["h_max"] = h_max.to("m").value
                feature_events[cam_id]["err_est_pos"] = np.nan
                feature_events[cam_id]["err_est_dir"] = np.nan
                feature_events[cam_id]["mc_energy"] = event.mc.energy.to(
                    "TeV").value
                feature_events[cam_id]["local_distance"] = moments.r.to(
                    "m").value
                feature_events[cam_id]["n_pixel"] = n_pixel_dict[tel_id]
                feature_events[cam_id]["obs_id"] = event.r0.obs_id
                feature_events[cam_id]["event_id"] = event.r0.event_id
                feature_events[cam_id]["tel_id"] = tel_id
                feature_events[cam_id]["xi"] = xi.to("deg").value
                feature_events[cam_id]["reco_energy"] = reco_energy
                feature_events[cam_id]["ellipticity"] = ellipticity.value
                feature_events[cam_id]["n_cluster"] = n_cluster_dict[tel_id]
                feature_events[cam_id]["n_tel_reco"] = n_tels["reco"]
                feature_events[cam_id]["n_tel_discri"] = n_tels["discri"]
                feature_events[cam_id]["mc_core_x"] = event.mc.core_x.to(
                    "m").value
                feature_events[cam_id]["mc_core_y"] = event.mc.core_y.to(
                    "m").value
                feature_events[cam_id]["reco_core_x"] = reco_core_x.to(
                    "m").value
                feature_events[cam_id]["reco_core_y"] = reco_core_y.to(
                    "m").value
                feature_events[cam_id][
                    "mc_h_first_int"] = event.mc.h_first_int.to("m").value
                feature_events[cam_id]["offset"] = offset.to("deg").value
                feature_events[cam_id][
                    "mc_x_max"] = event.mc.x_max.value  # g / cm2
                feature_events[cam_id]["alt"] = reco_result.alt.to("deg").value
                feature_events[cam_id]["az"] = reco_result.az.to("deg").value
                feature_events[cam_id]["reco_energy_tel"] = reco_energy_tel[
                    tel_id]
                # Variables from hillas_dist_reco
                feature_events[cam_id][
                    "ellipticity_reco"] = ellipticity_reco.value
                feature_events[cam_id][
                    "local_distance_reco"] = moments_reco.r.to("m").value
                feature_events[cam_id]["skewness_reco"] = moments_reco.skewness
                feature_events[cam_id]["kurtosis_reco"] = moments_reco.kurtosis
                feature_events[cam_id]["width_reco"] = moments_reco.width.to(
                    "m").value
                feature_events[cam_id]["length_reco"] = moments_reco.length.to(
                    "m").value
                feature_events[cam_id]["psi_reco"] = moments_reco.psi.to(
                    "deg").value
                feature_events[cam_id][
                    "sum_signal_cam_reco"] = moments_reco.intensity

                feature_events[cam_id].append()

            if signal_handler.stop:
                break
        if signal_handler.stop:
            break
    # make sure that all the events are properly stored
    for table in feature_table.values():
        table.flush()

    img_cutflow()
    evt_cutflow()