Beispiel #1
0
def test_hdf(example_subarray):
    import tables

    with tempfile.NamedTemporaryFile(suffix=".hdf5") as f:

        example_subarray.to_hdf(f.name)
        read = SubarrayDescription.from_hdf(f.name)

        assert example_subarray == read

        # test that subarrays without name (v0.8.0) work:
        with tables.open_file(f.name, "r+") as hdf:
            del hdf.root.configuration.instrument.subarray._v_attrs.name

        no_name = SubarrayDescription.from_hdf(f.name)
        assert no_name.name == "Unknown"

    # test with a subarray that has two different telescopes with the same
    # camera
    tel = {
        1: TelescopeDescription.from_name(optics_name="SST-ASTRI", camera_name="CHEC"),
        2: TelescopeDescription.from_name(optics_name="SST-GCT", camera_name="CHEC"),
    }
    pos = {1: [0, 0, 0] * u.m, 2: [50, 0, 0] * u.m}

    array = SubarrayDescription("test array", tel_positions=pos, tel_descriptions=tel)

    with tempfile.NamedTemporaryFile(suffix=".hdf5") as f:

        array.to_hdf(f.name)
        read = SubarrayDescription.from_hdf(f.name)

        assert array == read
Beispiel #2
0
def test_hdf(example_subarray, tmp_path):
    import tables

    path = tmp_path / "subarray.h5"

    example_subarray.to_hdf(path)
    read = SubarrayDescription.from_hdf(path)

    assert example_subarray == read

    # test we can write the read subarray
    read.to_hdf(path, overwrite=True)

    # test we have a frame attached to the geometry with correction information
    for tel_id, tel in read.tel.items():
        assert (tel.camera.geometry.frame.focal_length ==
                tel.optics.equivalent_focal_length)
        # test if transforming works
        tel.camera.geometry.transform_to(TelescopeFrame())

    # test that subarrays without name (v0.8.0) work:
    with tables.open_file(path, "r+") as hdf:
        del hdf.root.configuration.instrument.subarray._v_attrs.name

    no_name = SubarrayDescription.from_hdf(path)
    assert no_name.name == "Unknown"

    # Test we can also write and read to an already opened h5file
    with tables.open_file(path, "w") as h5file:
        example_subarray.to_hdf(h5file)

    with tables.open_file(path, "r") as h5file:
        assert SubarrayDescription.from_hdf(h5file) == example_subarray
Beispiel #3
0
def merging_check(file_list):
    """
    Check that a list of hdf5 files are compatible for merging regarding:
     - array info
     - metadata
     - MC simu info (only for simulations)
     - MC histograms (only for simulations)

    Parameters
    ----------
    file_list: list of paths to hdf5 files

    Returns
    -------
    list: list of paths of files that can be merged
    """
    if len(file_list) < 2:
        raise ValueError("Need at least two files for merging")

    mergeable_list = file_list.copy()

    first_file = mergeable_list[0]
    subarray_info0 = SubarrayDescription.from_hdf(first_file)
    metadata0 = read_metadata(first_file)

    if subarray_info0.name == "MonteCarloArray":
        mcheader0 = read_simu_info_hdf5(first_file)
        thrown_events_hist0 = read_simtel_energy_histogram(first_file)

    for filename in mergeable_list[1:]:
        try:
            metadata = read_metadata(filename)
            check_metadata(metadata0, metadata)
            subarray_info = SubarrayDescription.from_hdf(filename)

            if subarray_info0.name == "MonteCarloArray":
                mcheader = read_simu_info_hdf5(filename)
                thrown_events_hist = read_simtel_energy_histogram(filename)
                check_mcheader(mcheader0, mcheader)
                check_thrown_events_histogram(thrown_events_hist0,
                                              thrown_events_hist)

            if subarray_info != subarray_info0:
                raise ValueError('Subarrays do not match')

        except ValueError as e:
            log.error(rf"{filename} cannot be merged '¯\_(ツ)_/¯: {e}'")
            mergeable_list.remove(filename)

    return mergeable_list
Beispiel #4
0
def test_hdf(example_subarray):
    import tables

    with tempfile.NamedTemporaryFile(suffix=".hdf5") as f:

        example_subarray.to_hdf(f.name)
        read = SubarrayDescription.from_hdf(f.name)

        assert example_subarray == read

        # test we can write the read subarray
        read.to_hdf(f.name, overwrite=True)

        for tel_id, tel in read.tel.items():
            assert (tel.camera.geometry.frame.focal_length ==
                    tel.optics.equivalent_focal_length)

            # test if transforming works
            tel.camera.geometry.transform_to(TelescopeFrame())

        # test that subarrays without name (v0.8.0) work:
        with tables.open_file(f.name, "r+") as hdf:
            del hdf.root.configuration.instrument.subarray._v_attrs.name

        no_name = SubarrayDescription.from_hdf(f.name)
        assert no_name.name == "Unknown"

    # test with a subarray that has two different telescopes with the same
    # camera
    tel = {
        1:
        TelescopeDescription.from_name(optics_name="SST-ASTRI",
                                       camera_name="CHEC"),
        2:
        TelescopeDescription.from_name(optics_name="SST-GCT",
                                       camera_name="CHEC"),
    }
    pos = {1: [0, 0, 0] * u.m, 2: [50, 0, 0] * u.m}

    array = SubarrayDescription("test array",
                                tel_positions=pos,
                                tel_descriptions=tel)

    with tempfile.NamedTemporaryFile(suffix=".hdf5") as f:

        array.to_hdf(f.name)
        read = SubarrayDescription.from_hdf(f.name)

        assert array == read
Beispiel #5
0
def merged_h5file(tmp_path, simulated_dl1_file):
    """Produce a merged h5 file from simulated dl1 files."""
    from lstchain.io.io import auto_merge_h5files

    subarray_before = SubarrayDescription.from_hdf(simulated_dl1_file)

    merged_dl1_file = tmp_path / "dl1_merged.h5"
    auto_merge_h5files([simulated_dl1_file, simulated_dl1_file],
                       output_filename=merged_dl1_file)

    subarray_merged = SubarrayDescription.from_hdf(merged_dl1_file)

    # check that subarray name is correctly retained
    assert subarray_before.name == subarray_merged.name
    return merged_dl1_file
Beispiel #6
0
    def __init__(self, input_url=None, config=None, parent=None, **kwargs):
        """
        EventSource for dl1 files in the standard DL1 data format

        Parameters:
        -----------
        input_url : str
            Path of the file to load
        config : traitlets.loader.Config
            Configuration specified by config file or cmdline arguments.
            Used to set traitlet values.
            Set to None if no configuration to pass.
        parent:
            Parent from which the config is used. Mutually exclusive with config
        kwargs
        """
        super().__init__(input_url=input_url,
                         config=config,
                         parent=parent,
                         **kwargs)

        self.file_ = tables.open_file(self.input_url)
        self._subarray_info = SubarrayDescription.from_hdf(self.input_url)
        self._simulation_configs = self._parse_simulation_configs()
        self.datamodel_version = self.file_.root._v_attrs[
            "CTA PRODUCT DATA MODEL VERSION"]
        params = "parameters" in self.file_.root.dl1.event.telescope
        images = "images" in self.file_.root.dl1.event.telescope
        if params and images:
            self._datalevels = (DataLevel.DL1_IMAGES, DataLevel.DL1_PARAMETERS)
        elif params:
            self._datalevels = (DataLevel.DL1_PARAMETERS, )
        elif images:
            self._datalevels = (DataLevel.DL1_IMAGES, )
Beispiel #7
0
def test_observed_dl1_validity(observed_dl1_files):
    dl1_df = pd.read_hdf(observed_dl1_files["dl1_file1"],
                         key=dl1_params_lstcam_key)
    # The first valid timestamp in the test run corresponds
    # to its third event (see night summary)
    first_timestamp_nightsummary = 1582059789516351903  # ns
    first_event_timestamp = dl1_df["dragon_time"].iloc[2]  # third event

    dl1_tables = get_dataset_keys(observed_dl1_files["dl1_file1"])

    assert dl1_params_lstcam_key in dl1_tables
    assert dl1_images_lstcam_key in dl1_tables
    assert dl1_params_tel_mon_cal_key in dl1_tables
    assert dl1_params_tel_mon_ped_key in dl1_tables
    assert dl1_params_tel_mon_flat_key in dl1_tables

    subarray = SubarrayDescription.from_hdf(observed_dl1_files['dl1_file1'])
    assert 1 in subarray.tel
    assert subarray.tel[1].name == "LST"

    assert "alt_tel" in dl1_df.columns
    assert "az_tel" in dl1_df.columns
    assert "trigger_type" in dl1_df.columns
    assert "ucts_trigger_type" in dl1_df.columns
    assert "trigger_time" in dl1_df.columns
    assert "dragon_time" in dl1_df.columns
    assert "tib_time" in dl1_df.columns
    assert "ucts_time" in dl1_df.columns
    assert np.isclose(
        (Time(first_event_timestamp, format="unix") -
         Time(first_timestamp_nightsummary / 1e9, format="unix_tai")).to_value(
             u.s),
        0,
    )
    np.testing.assert_allclose(dl1_df["dragon_time"], dl1_df["trigger_time"])
Beispiel #8
0
def test_hdf_duplicate_string_repr(tmp_path):
    """Test writing and reading of a subarray with two telescopes that
    are different but have the same name.
    """
    # test with a subarray that has two different telescopes with the same
    # camera
    tel1 = TelescopeDescription.from_name(optics_name="LST", camera_name="LSTCam")

    # second telescope is almost the same and as the same str repr
    tel2 = deepcopy(tel1)
    # e.g. one mirror fell off
    tel2.optics.num_mirror_tiles = tel1.optics.num_mirror_tiles - 1

    array = SubarrayDescription(
        "test array",
        tel_positions={1: [0, 0, 0] * u.m, 2: [50, 0, 0] * u.m},
        tel_descriptions={1: tel1, 2: tel2},
    )

    # defensive checks to make sure we are actually testing this
    assert len(array.telescope_types) == 2
    assert str(tel1) == str(tel2)
    assert tel1 != tel2

    path = tmp_path / "subarray.h5"
    array.to_hdf(path)
    read = SubarrayDescription.from_hdf(path)
    assert array == read
    assert (
        read.tel[1].optics.num_mirror_tiles == read.tel[2].optics.num_mirror_tiles + 1
    )
Beispiel #9
0
def main():

    args = parser.parse_args()

    dl1_filename = os.path.abspath(args.input_file)

    config = get_standard_config()
    if args.config_file is not None:
        try:
            config = read_configuration_file(os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    dl1_params = pd.read_hdf(dl1_filename, key=dl1_params_lstcam_key)
    subarray_info = SubarrayDescription.from_hdf(dl1_filename)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    src_dep_df = pd.concat(get_source_dependent_parameters(
        dl1_params, config, focal_length=focal_length),
                           axis=1)

    metadata = global_metadata()
    write_dataframe(src_dep_df,
                    dl1_filename,
                    dl1_params_src_dep_lstcam_key,
                    config=config,
                    meta=metadata)
Beispiel #10
0
    def __init__(self, input_url, config=None, parent=None, **kwargs):
        """
        EventSource for dl1 files in the standard DL1 data format

        Parameters:
        -----------
        input_url : str
            Path of the file to load
        config : traitlets.loader.Config
            Configuration specified by config file or cmdline arguments.
            Used to set traitlet values.
            Set to None if no configuration to pass.
        parent:
            Parent from which the config is used. Mutually exclusive with config
        kwargs
        """
        super().__init__(input_url=input_url, config=config, parent=parent, **kwargs)

        self.file_ = tables.open_file(input_url)
        self.input_url = input_url
        self._subarray_info = SubarrayDescription.from_hdf(self.input_url)
        self._mc_headers = self._parse_mc_headers()
        self.datamodel_version = self.file_.root._v_attrs[
            "CTA PRODUCT DATA MODEL VERSION"
        ]
Beispiel #11
0
def test_allowed_tels(tmp_path, dl1_file, dl1_proton_file):
    from ctapipe.tools.merge import MergeTool
    from ctapipe.instrument import SubarrayDescription

    # create file to test 'allowed-tels' option
    output = tmp_path / "merged_allowed_tels.dl1.h5"

    allowed_tels = {25, 125}

    argv = [
        str(dl1_file),
        str(dl1_proton_file), f"--output={output}", "--overwrite"
    ]
    for tel_id in allowed_tels:
        argv.append(f"--allowed-tels={tel_id}")

    ret = run_tool(MergeTool(), argv=argv, cwd=tmp_path)
    assert ret == 0

    s = SubarrayDescription.from_hdf(output)
    assert s.tel.keys() == allowed_tels

    tel_keys = {f"tel_{tel_id:03d}" for tel_id in allowed_tels}
    with tables.open_file(output) as f:
        assert set(f.root.dl1.event.telescope.parameters._v_children).issubset(
            tel_keys)
        assert set(
            f.root.dl1.event.telescope.images._v_children).issubset(tel_keys)
        assert set(
            f.root.dl1.monitoring.telescope.pointing._v_children).issubset(
                tel_keys)
Beispiel #12
0
def merging_check(file_list):
    """
    Check that a list of hdf5 files are compatible for merging regarding:
     - array info
     - metadata
     - MC simu info (only for simulations)
     - MC histograms (only for simulations)

    Parameters
    ----------
    file_list: list of paths to hdf5 files

    Returns
    -------
    list: list of paths of files that can be merged
    """
    assert len(file_list) > 1, "The list of files is too short"
    mergeable_list = file_list.copy()

    first_file = mergeable_list[0]
    subarray_info0 = SubarrayDescription.from_hdf(first_file)
    metadata0 = read_metadata(first_file)

    if subarray_info0.name == "MonteCarloArray":
        mcheader0 = read_simu_info_hdf5(first_file)
        thrown_events_hist0 = read_simtel_energy_histogram(first_file)

    for filename in mergeable_list[1:]:
        try:
            metadata = read_metadata(filename)
            check_metadata(metadata0, metadata)
            subarray_info = SubarrayDescription.from_hdf(filename)

            if subarray_info0.name == "MonteCarloArray":
                mcheader = read_simu_info_hdf5(filename)
                thrown_events_hist = read_simtel_energy_histogram(filename)
                check_mcheader(mcheader0, mcheader)
                check_thrown_events_histogram(thrown_events_hist0,
                                              thrown_events_hist)

            assert subarray_info == subarray_info0

        except AssertionError:
            log.exception(f"{filename} cannot be smart merged '¯\_(ツ)_/¯'")
            mergeable_list.remove(filename)

    return mergeable_list
Beispiel #13
0
def test_dl1writer(tmpdir: Path):
    """
    Check that we can write DL1 files

    Parameters
    ----------
    tmpdir :
        temp directory fixture
    """

    output_path = Path(tmpdir / "events.dl1.h5")
    source = EventSource(
        get_dataset_path(
            "gamma_LaPalma_baseline_20Zd_180Az_prod3b_test.simtel.gz"),
        max_events=20,
        allowed_tels=[1, 2, 3, 4],
    )
    calibrate = CameraCalibrator(subarray=source.subarray)

    with DL1Writer(
            event_source=source,
            output_path=output_path,
            write_parameters=False,
            write_images=True,
    ) as write_dl1:
        write_dl1.log.level = logging.DEBUG
        for event in source:
            calibrate(event)
            write_dl1(event)
        write_dl1.write_simulation_histograms(source)

    assert output_path.exists()

    # check we can get the subarray description:
    sub = SubarrayDescription.from_hdf(output_path)
    assert sub.num_tels > 0

    # check a few things in the output just to make sure there is output. For a
    # full test of the data model, a verify tool should be created.
    with tables.open_file(output_path) as h5file:
        images = h5file.get_node("/dl1/event/telescope/images/tel_001")
        assert images.col("image").max() > 0.0
        assert (h5file.root._v_attrs["CTA PRODUCT DATA MODEL VERSION"]  # pylint: disable=protected-access
                == DL1_DATA_MODEL_VERSION)
        shower = h5file.get_node("/simulation/event/subarray/shower")
        assert len(shower) > 0
        assert shower.col("true_alt").mean() > 0.0
        assert (shower._v_attrs["true_alt_UNIT"] == "deg")  # pylint: disable=protected-access
Beispiel #14
0
def test_hdf_same_camera(tmp_path):
    """Test writing / reading subarray to hdf5 with a subarray that has two
    different telescopes with the same camera
    """
    tel = {
        1: TelescopeDescription.from_name(optics_name="SST-ASTRI", camera_name="CHEC"),
        2: TelescopeDescription.from_name(optics_name="SST-GCT", camera_name="CHEC"),
    }
    pos = {1: [0, 0, 0] * u.m, 2: [50, 0, 0] * u.m}

    array = SubarrayDescription("test array", tel_positions=pos, tel_descriptions=tel)

    path = tmp_path / "subarray.h5"
    array.to_hdf(path)
    read = SubarrayDescription.from_hdf(path)
    assert array == read
Beispiel #15
0
def test_allowed_tels(tmp_path, dl1_file, dl1_proton_file):
    from ctapipe.tools.dl1_merge import MergeTool
    from ctapipe.instrument import SubarrayDescription

    # create file to test 'allowed-tels' option
    output = tmp_path / "merged_allowed_tels.dl1.h5"
    ret = run_tool(
        MergeTool(),
        argv=[
            str(dl1_file),
            str(dl1_proton_file),
            f"--output={output}",
            "--allowed-tels=[1,2]",
            "--overwrite",
        ],
        cwd=tmp_path,
    )
    assert ret == 0

    s = SubarrayDescription.from_hdf(output)
    assert s.tel.keys() == {1, 2}
Beispiel #16
0
def test_dl1writer_no_events(tmpdir: Path):
    """
    Check that we can write DL1 files even when no events are given

    Parameters
    ----------
    tmpdir :
        temp directory fixture
    """

    output_path = Path(tmpdir / "no_events.dl1.h5")
    dataset = "lst_prod3_calibration_and_mcphotons.simtel.zst"
    with EventSource(get_dataset_path(dataset),
                     focal_length_choice='nominal') as source:
        # exhaust source
        for _ in source:
            pass

    assert source.file_.histograms is not None

    with DataWriter(
            event_source=source,
            output_path=output_path,
            write_parameters=True,
            write_images=True,
    ) as writer:
        writer.log.level = logging.DEBUG
        writer.write_simulation_histograms(source)

    assert output_path.exists()

    # check we can get the subarray description:
    sub = SubarrayDescription.from_hdf(output_path)
    assert sub == source.subarray

    with tables.open_file(output_path) as h5file:
        assert h5file.get_node("/configuration/simulation/run") is not None
        assert h5file.get_node(
            "/simulation/service/shower_distribution") is not None
Beispiel #17
0
def build_models(filegammas, fileprotons,
                 save_models=True, path_models="./",
                 energy_min=-np.inf,
                 custom_config=None,
                 ):
    """
    Uses MC data to train Random Forests for Energy and DISP
    reconstruction and G/H separation and returns the trained RFs.
    The passed config superseeds the standard configuration.
    Here is the complete workflow with the number of events selected from the config:

    .. mermaid::

        graph LR
            GAMMA[gammas] -->|#`gamma_regressors`| REG(regressors) --> DISK
            GAMMA --> S(split)
            S --> |#`gamma_tmp_regressors`| g_train
            S --> |#`gamma_classifier`| g_test
            g_train --> tmp_reg(tmp regressors)
            tmp_reg --- A[ ]:::empty
            g_test --- A
            A --> g_test_dl2
            g_test_dl2 --- D[ ]:::empty
            protons -------- |#`proton_classifier`| D
            D --> cls(classifier)
            cls--> DISK
            classDef empty width:0px,height:0px;


    Parameters
    ----------
    filegammas: string
        path to the file with MC gamma events
    fileprotons: string
        path to the file with MC proton events
    save_models: bool
        True to save the trained models on disk
    path_models: string
        path of a directory where to save the models.
        if it does exist, the directory is created
    energy_min: float
        Cut in intensity of the showers for training RF
    custom_config: dictionnary
       Modified configuration to update the standard one
    test_size: float or int
        If float, should be between 0.0 and 1.0 and represent the proportion of the dataset to include in the test split.
        If int, represents the absolute number of test samples.
        If None, it will be set to 0.25.

    Returns
    -------
    if config['disp_method'] == 'disp_vector':
        return reg_energy, reg_disp_vector, cls_gh
    elif config['disp_method'] == 'disp_norm_sign':
        return reg_energy, reg_disp_norm, cls_disp_sign, cls_gh

    Raises
    ------
    ValueError
        If the requested number of gamma events in the config for the training of the classifier is not valid.
        See config["n_training_events"]
    """

    custom_config = {} if custom_config is None else custom_config
    config = replace_config(standard_config, custom_config)
    events_filters = config["events_filters"]

    # Adding a filter on mc_type just for training
    events_filters['mc_type'] = [-9000, np.inf]

    df_gamma = pd.read_hdf(filegammas, key=dl1_params_lstcam_key)
    df_proton = pd.read_hdf(fileprotons, key=dl1_params_lstcam_key)

    if config['source_dependent']:
        # if source-dependent parameters are already in dl1 data, just read those data
        # if not, source-dependent parameters are added here
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(filegammas):
            src_dep_df_gamma = get_srcdep_params(filegammas)

        else:
            subarray_info = SubarrayDescription.from_hdf(filegammas)
            tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
            focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length
            src_dep_df_gamma = get_source_dependent_parameters(df_gamma, config, focal_length=focal_length)

        df_gamma = pd.concat([df_gamma, src_dep_df_gamma['on']], axis=1)

        # if source-dependent parameters are already in dl1 data, just read those data
        # if not, source-dependent parameters are added here
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(fileprotons):
            src_dep_df_proton = get_srcdep_params(fileprotons)

        else:
            subarray_info = SubarrayDescription.from_hdf(fileprotons)
            tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
            focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length
            src_dep_df_proton = get_source_dependent_parameters(df_proton, config, focal_length=focal_length)

        df_proton = pd.concat([df_proton, src_dep_df_proton['on']], axis=1)

    df_gamma = utils.filter_events(df_gamma,
                                   filters=events_filters,
                                   finite_params=config['energy_regression_features']
                                                 + config['disp_regression_features']
                                                 + config['particle_classification_features']
                                                 + config['disp_classification_features'],
                                   )

    df_proton = utils.filter_events(df_proton,
                                    filters=events_filters,
                                    finite_params=config['energy_regression_features']
                                                  + config['disp_regression_features']
                                                  + config['particle_classification_features']
                                                  + config['disp_classification_features'],
                                    )

    # Training MC gammas in reduced viewcone
    src_r_m = np.sqrt(df_gamma['src_x'] ** 2 + df_gamma['src_y'] ** 2)
    foclen = OPTICS.equivalent_focal_length.value
    src_r_deg = np.rad2deg(np.arctan(src_r_m / foclen))
    df_gamma = df_gamma[(src_r_deg >= config['train_gamma_src_r_deg'][0]) & (
            src_r_deg <= config['train_gamma_src_r_deg'][1])]

    # Train regressors for energy and disp_norm reconstruction, only with gammas
    n_gamma_regressors = config["n_training_events"]["gamma_regressors"]
    if n_gamma_regressors not in [1.0, None]:
        try:
            df_gamma_reg, _ = train_test_split(df_gamma, train_size=n_gamma_regressors)
        except ValueError as e:
            raise ValueError(f"The requested number of gammas {n_gamma_regressors} "
                             f"for the regressors training is not valid.") from e
    else:
        df_gamma_reg = df_gamma

    reg_energy = train_energy(df_gamma_reg, custom_config=config)

    if config['disp_method'] == 'disp_vector':
        reg_disp_vector = train_disp_vector(df_gamma, custom_config=config)
    elif config['disp_method'] == 'disp_norm_sign':
        reg_disp_norm = train_disp_norm(df_gamma, custom_config=config)
        cls_disp_sign = train_disp_sign(df_gamma, custom_config=config)

    # Train classifier for gamma/hadron separation.
    test_size = config['n_training_events']['gamma_classifier']
    train_size = config['n_training_events']['gamma_tmp_regressors']
    try:
        train, testg = train_test_split(df_gamma, test_size=test_size, train_size=train_size)
    except ValueError as e:
        raise ValueError(
            "The requested number of gammas for the classifier training is not valid."
        ) from e

    n_proton_classifier = config["n_training_events"]["proton_classifier"]
    if n_proton_classifier not in [1.0, None]:
        try:
            df_proton, _ = train_test_split(df_proton, train_size=config['n_training_events']['proton_classifier'])
        except ValueError as e:
            raise ValueError(
                "The requested number of protons for the classifier training is not valid."
            ) from e

    test = testg.append(df_proton, ignore_index=True)

    temp_reg_energy = train_energy(train, custom_config=config)

    if config['disp_method'] == 'disp_vector':
        temp_reg_disp_vector = train_disp_vector(train, custom_config=config)
    elif config['disp_method'] == 'disp_norm_sign':
        tmp_reg_disp_norm = train_disp_norm(train, custom_config=config)
        tmp_cls_disp_sign = train_disp_sign(train, custom_config=config)

    # Apply the regressors to the test set

    test['log_reco_energy'] = temp_reg_energy.predict(test[config['energy_regression_features']])

    if config['disp_method'] == 'disp_vector':
        disp_vector = temp_reg_disp_vector.predict(test[config['disp_regression_features']])
    elif config['disp_method'] == 'disp_norm_sign':
        disp_norm = tmp_reg_disp_norm.predict(test[config['disp_regression_features']])
        disp_sign = tmp_cls_disp_sign.predict(test[config['disp_classification_features']])
        test['reco_disp_norm'] = disp_norm
        test['reco_disp_sign'] = disp_sign

        disp_angle = test['psi']  # the source here is supposed to be in the direction given by Hillas
        disp_vector = disp.disp_vector(disp_norm, disp_angle, disp_sign)

    test['reco_disp_dx'] = disp_vector[:, 0]
    test['reco_disp_dy'] = disp_vector[:, 1]

    test['reco_src_x'], test['reco_src_y'] = disp.disp_to_pos(test['reco_disp_dx'],
                                                              test['reco_disp_dy'],
                                                              test['x'], test['y'])

    # give skewness and time gradient a meaningful sign, i.e. referred to the reconstructed source position:
    longi, _ = camera_to_shower_coordinates(test['reco_src_x'], test['reco_src_y'],
                                            test['x'], test['y'], test['psi'])
    test['signed_skewness'] = -1 * np.sign(longi) * test['skewness']
    test['signed_time_gradient'] = -1 * np.sign(longi) * test['time_gradient']

    # Apply cut in reconstructed energy. New train set is the previous
    # test with energy and disp_norm reconstructed.

    train = test[test['log_reco_energy'] > energy_min]

    del temp_reg_energy

    if config['disp_method'] == 'disp_vector':
        del temp_reg_disp_vector
    elif config['disp_method'] == 'disp_norm_sign':
        del tmp_reg_disp_norm, tmp_cls_disp_sign

    # Train the Classifier

    cls_gh = train_sep(train, custom_config=config)

    if save_models:
        os.makedirs(path_models, exist_ok=True)

        file_reg_energy = path_models + "/reg_energy.sav"
        joblib.dump(reg_energy, file_reg_energy, compress=3)

        if config['disp_method'] == 'disp_vector':
            file_reg_disp_vector = path_models + "/reg_disp_vector.sav"
            joblib.dump(reg_disp_vector, file_reg_disp_vector, compress=3)

        elif config['disp_method'] == 'disp_norm_sign':
            file_reg_disp_norm = os.path.join(path_models, 'reg_disp_norm.sav')
            file_cls_disp_sign = os.path.join(path_models, 'cls_disp_sign.sav')
            joblib.dump(reg_disp_norm, file_reg_disp_norm, compress=3)
            joblib.dump(cls_disp_sign, file_cls_disp_sign, compress=3)

        file_cls_gh = path_models + "/cls_gh.sav"
        joblib.dump(cls_gh, file_cls_gh, compress=3)

    if config['disp_method'] == 'disp_vector':
        return reg_energy, reg_disp_vector, cls_gh
    elif config['disp_method'] == 'disp_norm_sign':
        return reg_energy, reg_disp_norm, cls_disp_sign, cls_gh
Beispiel #18
0
def plot(filename='longterm_dl1_check.h5', tel_id=1):

    # First read in the camera geometry:
    subarray_info = SubarrayDescription.from_hdf(filename)
    camgeom = subarray_info.tel[tel_id].camera.geometry
    engineering_geom = camgeom.transform_to(EngineeringCameraFrame())

    file = tables.open_file('longterm_dl1_check.h5')

    bokeh_output_file(Path(filename).with_suffix('.html'),
                      title='LST1 long-term DL1 data check')

    run_titles = []
    for i, run in enumerate(file.root.pixwise_runsummary.col('runnumber')):
        date = pd.to_datetime(file.root.pixwise_runsummary.col('time')[i],
                              origin='unix',
                              unit='s')
        run_titles.append('Run {0:05d}, {date}'.\
                          format(run,
                                 date = date.strftime("%b %d %Y %H:%M:%S")))

    runsummary = pd.read_hdf(filename, 'runsummary')
    page0 = Panel()
    fig_ped_rates = show_graph(
        x=pd.to_datetime(runsummary['time'], origin='unix', unit='s'),
        y=runsummary['num_pedestals'] / runsummary['elapsed_time'],
        xlabel='date',
        ylabel='Interleaved pedestals rate',
        ey=np.sqrt(runsummary['num_pedestals']) / runsummary['elapsed_time'],
        xtype='datetime',
        ytype='linear',
        point_labels=run_titles)
    fig_ff_rates = show_graph(
        x=pd.to_datetime(runsummary['time'], origin='unix', unit='s'),
        y=runsummary['num_flatfield'] / runsummary['elapsed_time'],
        xlabel='date',
        ylabel='Interleaved flat field rate',
        ey=np.sqrt(runsummary['num_flatfield']) / runsummary['elapsed_time'],
        xtype='datetime',
        ytype='linear',
        point_labels=run_titles)
    fig_cosmic_rates = show_graph(
        x=pd.to_datetime(runsummary['time'], origin='unix', unit='s'),
        y=runsummary['num_cosmics'] / runsummary['elapsed_time'],
        xlabel='date',
        ylabel='Cosmics rate',
        ey=np.sqrt(runsummary['num_cosmics']) / runsummary['elapsed_time'],
        xtype='datetime',
        ytype='linear',
        point_labels=run_titles)
    fig_muring_rates = show_graph(
        x=pd.to_datetime(runsummary['time'], origin='unix', unit='s'),
        y=runsummary['num_contained_mu_rings'] / runsummary['elapsed_time'],
        xlabel='date',
        ylabel='Contained mu-rings rate',
        ey=np.sqrt(runsummary['num_contained_mu_rings']) /
        runsummary['elapsed_time'],
        xtype='datetime',
        ytype='linear',
        point_labels=run_titles)

    pad_width = 550
    pad_height = 350
    row1 = [fig_ped_rates, fig_ff_rates]
    row2 = [fig_cosmic_rates, fig_muring_rates]
    grid0 = gridplot([row1, row2],
                     sizing_mode=None,
                     plot_width=pad_width,
                     plot_height=pad_height)
    page0.child = grid0
    page0.title = 'Event rates'

    page0b = Panel()
    altmin = np.rad2deg(runsummary['min_altitude'])
    altmean = np.rad2deg(runsummary['mean_altitude'])
    altmax = np.rad2deg(runsummary['max_altitude'])
    fig_altitude = show_graph(x=pd.to_datetime(runsummary['time'],
                                               origin='unix',
                                               unit='s'),
                              y=altmean,
                              xlabel='date',
                              ylabel='Telescope altitude (mean, min, max)',
                              eylow=altmean - altmin,
                              eyhigh=altmax - altmean,
                              xtype='datetime',
                              ytype='linear',
                              point_labels=run_titles)
    fig_altitude.y_range = Range1d(altmin.min() * 0.95, altmax.max() * 1.05)
    row1 = [fig_altitude]
    grid0b = gridplot([row1],
                      sizing_mode=None,
                      plot_width=pad_width,
                      plot_height=pad_height)
    page0b.child = grid0b
    page0b.title = 'Pointing'

    page1 = Panel()
    pad_width = 350
    pad_height = 370
    mean = []
    stddev = []
    for item in file.root.pixwise_runsummary.col('ped_pix_charge_mean'):
        mean.append(item)
    for item in file.root.pixwise_runsummary.col('ped_pix_charge_stddev'):
        stddev.append(item)
    row1 = show_camera(np.array(mean), engineering_geom, pad_width, pad_height,
                       'Pedestals mean charge', run_titles)
    row2 = show_camera(np.array(stddev), engineering_geom, pad_width,
                       pad_height, 'Pedestals charge std dev', run_titles)
    grid1 = gridplot([row1, row2],
                     sizing_mode=None,
                     plot_width=pad_width,
                     plot_height=pad_height)
    page1.child = grid1
    page1.title = 'Interleaved pedestals'

    page2 = Panel()
    mean = []
    stddev = []
    for item in file.root.pixwise_runsummary.col('ff_pix_charge_mean'):
        mean.append(item)
    for item in file.root.pixwise_runsummary.col('ff_pix_charge_stddev'):
        stddev.append(item)
    row1 = show_camera(np.array(mean), engineering_geom, pad_width, pad_height,
                       'Flat-Field mean charge (pe)', run_titles)
    row2 = show_camera(np.array(stddev), engineering_geom, pad_width,
                       pad_height, 'Flat-Field charge std dev (pe)',
                       run_titles)
    grid2 = gridplot([row1, row2],
                     sizing_mode=None,
                     plot_width=pad_width,
                     plot_height=pad_height)
    page2.child = grid2
    page2.title = 'Interleaved flat field, charge'

    page3 = Panel()
    mean = []
    stddev = []
    for item in file.root.pixwise_runsummary.col('ff_pix_rel_time_mean'):
        mean.append(item)
    for item in file.root.pixwise_runsummary.col('ff_pix_rel_time_stddev'):
        stddev.append(item)
    row1 = show_camera(np.array(mean),
                       engineering_geom,
                       pad_width,
                       pad_height,
                       'Flat-Field mean relative time (ns)',
                       run_titles,
                       showlog=False)
    row2 = show_camera(np.array(stddev),
                       engineering_geom,
                       pad_width,
                       pad_height,
                       'Flat-Field rel. time std dev (ns)',
                       run_titles,
                       showlog=False)
    grid3 = gridplot([row1, row2],
                     sizing_mode=None,
                     plot_width=pad_width,
                     plot_height=pad_height)
    page3.child = grid3
    page3.title = 'Interleaved flat field, time'

    page4 = Panel()
    pulse_fraction_above_10 = []
    pulse_fraction_above_30 = []
    for item in file.root.pixwise_runsummary.col(
            'cosmics_pix_fraction_pulses_above10'):
        pulse_fraction_above_10.append(item)
    for item in file.root.pixwise_runsummary.col(
            'cosmics_pix_fraction_pulses_above30'):
        pulse_fraction_above_30.append(item)

    row1 = show_camera(np.array(pulse_fraction_above_10), engineering_geom,
                       pad_width, pad_height,
                       'Cosmics, fraction of >10pe pulses', run_titles)
    row2 = show_camera(np.array(pulse_fraction_above_30), engineering_geom,
                       pad_width, pad_height,
                       'Cosmics, fraction of >30pe pulses', run_titles)

    grid4 = gridplot([row1, row2],
                     sizing_mode=None,
                     plot_width=pad_width,
                     plot_height=pad_height)
    page4.child = grid4
    page4.title = 'Cosmics'

    file.close()

    page5 = Panel()
    pad_width = 550
    pad_height = 350
    fig_mu_effi = show_graph(x=pd.to_datetime(runsummary['time'],
                                              origin='unix',
                                              unit='s'),
                             y=runsummary['mu_effi_mean'],
                             xlabel='date',
                             ylabel='telescope efficiency from mu-rings',
                             ey=runsummary['mu_effi_stddev'] /
                             np.sqrt(runsummary['num_contained_mu_rings']),
                             xtype='datetime',
                             ytype='linear',
                             point_labels=run_titles)
    fig_mu_effi.y_range = Range1d(0., 1.1 * np.max(runsummary['mu_effi_mean']))

    fig_mu_width = show_graph(x=pd.to_datetime(runsummary['time'],
                                               origin='unix',
                                               unit='s'),
                              y=runsummary['mu_width_mean'],
                              xlabel='date',
                              ylabel='muon ring width (deg)',
                              ey=runsummary['mu_width_stddev'] /
                              np.sqrt(runsummary['num_contained_mu_rings']),
                              xtype='datetime',
                              ytype='linear',
                              point_labels=run_titles)
    fig_mu_width.y_range = Range1d(0.,
                                   1.1 * np.max(runsummary['mu_width_mean']))

    fig_mu_intensity = show_graph(x=pd.to_datetime(runsummary['time'],
                                                   origin='unix',
                                                   unit='s'),
                                  y=runsummary['mu_intensity_mean'],
                                  xlabel='date',
                                  ylabel='mean muon ring intensity (p.e.)',
                                  xtype='datetime',
                                  ytype='linear',
                                  point_labels=run_titles)
    fig_mu_intensity.y_range = \
        Range1d(0., 1.1 * np.max(runsummary['mu_intensity_mean']))

    fig_mu_hg_peak = show_graph(x=pd.to_datetime(runsummary['time'],
                                                 origin='unix',
                                                 unit='s'),
                                y=runsummary['mu_hg_peak_sample_mean'],
                                xlabel='date',
                                ey=runsummary['mu_hg_peak_sample_stddev'],
                                ylabel='HG global peak sample id (mean&RMS)',
                                xtype='datetime',
                                ytype='linear',
                                point_labels=run_titles)
    fig_mu_hg_peak.y_range = Range1d(0., 38.)
    row1 = [fig_mu_effi, fig_mu_width]
    row2 = [fig_mu_intensity, fig_mu_hg_peak]

    grid5 = gridplot([row1, row2],
                     sizing_mode=None,
                     plot_width=pad_width,
                     plot_height=pad_height)
    page5.child = grid5
    page5.title = "Muons"

    page6 = Panel()
    pad_width = 550
    pad_height = 350
    fig_ped = show_graph(x=pd.to_datetime(runsummary['time'],
                                          origin='unix',
                                          unit='s'),
                         y=runsummary['ped_charge_mean'],
                         xlabel='date',
                         ylabel='Camera-averaged pedestal charge (pe/pixel)',
                         ey=runsummary['ped_charge_mean_err'],
                         xtype='datetime',
                         ytype='linear',
                         point_labels=run_titles)
    fig_ped.y_range = Range1d(0., 1.1 * np.max(runsummary['ped_charge_mean']))

    fig_ped_stddev = show_graph(x=pd.to_datetime(runsummary['time'],
                                                 origin='unix',
                                                 unit='s'),
                                y=runsummary['ped_charge_stddev'],
                                xlabel='date',
                                ylabel='Camera-averaged pedestal charge std '
                                'dev (pe/pixel)',
                                xtype='datetime',
                                ytype='linear',
                                point_labels=run_titles)
    fig_ped_stddev.y_range = \
        Range1d(0.,1.1*np.max(runsummary['ped_charge_stddev']))

    frac = runsummary['num_pedestals_after_cleaning'] / \
           runsummary['num_pedestals']
    err = np.sqrt(frac * (1 - frac) / runsummary['num_pedestals'])
    fig_ped_clean_fraction = show_graph(
        x=pd.to_datetime(runsummary['time'], origin='unix', unit='s'),
        y=frac,
        xlabel='date',
        ylabel='Fraction of pedestals surviving cleaning',
        ey=err,
        xtype='datetime',
        ytype='linear',
        point_labels=run_titles)

    row1 = [fig_ped, fig_ped_stddev]
    row2 = [fig_ped_clean_fraction]

    grid6 = gridplot([row1, row2],
                     sizing_mode=None,
                     plot_width=pad_width,
                     plot_height=pad_height)
    page6.child = grid6
    page6.title = "Interleaved pedestals, averages"

    page7 = Panel()
    pad_width = 550
    pad_height = 280
    fig_flatfield = show_graph(x=pd.to_datetime(runsummary['time'],
                                                origin='unix',
                                                unit='s'),
                               y=runsummary['ff_charge_mean'],
                               xlabel='date',
                               ylabel='Cam-averaged FF Q (pe/pixel)',
                               ey=runsummary['ff_charge_mean_err'],
                               xtype='datetime',
                               ytype='linear',
                               point_labels=run_titles)
    fig_flatfield.y_range = Range1d(0.,
                                    1.1 * np.max(runsummary['ff_charge_mean']))

    fig_ff_stddev = show_graph(x=pd.to_datetime(runsummary['time'],
                                                origin='unix',
                                                unit='s'),
                               y=runsummary['ff_charge_stddev'],
                               xlabel='date',
                               ylabel='Cam-averaged FF Q std '
                               'dev (pe/pixel)',
                               xtype='datetime',
                               ytype='linear',
                               point_labels=run_titles)
    fig_ff_stddev.y_range = \
        Range1d(0.,1.1*np.max(runsummary['ff_charge_stddev']))

    fig_ff_time = show_graph(x=pd.to_datetime(runsummary['time'],
                                              origin='unix',
                                              unit='s'),
                             y=runsummary['ff_time_mean'],
                             xlabel='date',
                             ylabel='Cam-averaged FF time (ns)',
                             ey=runsummary['ff_time_mean_err'],
                             xtype='datetime',
                             ytype='linear',
                             point_labels=run_titles)

    fig_ff_time_std = show_graph(x=pd.to_datetime(runsummary['time'],
                                                  origin='unix',
                                                  unit='s'),
                                 y=runsummary['ff_time_stddev'],
                                 xlabel='date',
                                 ylabel='Cam-averaged FF t std '
                                 'dev (ns)',
                                 xtype='datetime',
                                 ytype='linear',
                                 point_labels=run_titles)
    fig_ff_rel_time_std = show_graph(x=pd.to_datetime(runsummary['time'],
                                                      origin='unix',
                                                      unit='s'),
                                     y=runsummary['ff_rel_time_stddev'],
                                     xlabel='date',
                                     ylabel='Cam-averaged FF '
                                     'rel. pix t std dev (ns)',
                                     xtype='datetime',
                                     ytype='linear',
                                     point_labels=run_titles)
    fig_ff_rel_time_std.y_range = \
        Range1d(0., np.max([1., runsummary['ff_rel_time_stddev'].max()]))

    row1 = [fig_flatfield, fig_ff_stddev]
    row2 = [fig_ff_time, fig_ff_time_std]
    row3 = [fig_ff_rel_time_std]

    grid7 = gridplot([row1, row2, row3],
                     sizing_mode=None,
                     plot_width=pad_width,
                     plot_height=pad_height)
    page7.child = grid7
    page7.title = "Interleaved FF, averages"

    tabs = Tabs(
        tabs=[page0, page0b, page1, page2, page3, page4, page5, page6, page7])
    show(column(Div(text='<h1> Long-term DL1 data check </h1>'), tabs))
def main():
    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    # if real data, add deltat t to dataframe keys
    data = add_delta_t_key(data)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    # Load the trained RF for reconstruction:
    fileE = args.path_models + "/reg_energy.sav"
    fileD = args.path_models + "/reg_disp_vector.sav"
    fileH = args.path_models + "/cls_gh.sav"

    reg_energy = joblib.load(fileE)
    reg_disp_vector = joblib.load(fileD)
    cls_gh = joblib.load(fileH)

    subarray_info = SubarrayDescription.from_hdf(args.input_file)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    # Apply the models to the data

    # Source-independent analysis
    if not config['source_dependent']:
        data = filter_events(
            data,
            filters=config["events_filters"],
            finite_params=config['regression_features'] +
            config['classification_features'],
        )

        dl2 = dl1_to_dl2.apply_models(data,
                                      cls_gh,
                                      reg_energy,
                                      reg_disp_vector,
                                      focal_length=focal_length,
                                      custom_config=config)

    # Source-dependent analysis
    if config['source_dependent']:
        data_srcdep = pd.read_hdf(args.input_file,
                                  key=dl1_params_src_dep_lstcam_key)
        data_srcdep.columns = pd.MultiIndex.from_tuples([
            tuple(col[1:-1].replace('\'', '').replace(' ', '').split(","))
            for col in data_srcdep.columns
        ])

        dl2_srcdep_dict = {}

        for i, k in enumerate(data_srcdep.columns.levels[0]):
            data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1)
            data_with_srcdep_param = filter_events(
                data_with_srcdep_param,
                filters=config["events_filters"],
                finite_params=config['regression_features'] +
                config['classification_features'],
            )
            dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                             cls_gh,
                                             reg_energy,
                                             reg_disp_vector,
                                             focal_length=focal_length,
                                             custom_config=config)

            dl2_srcdep = dl2_df.drop(data.keys(), axis=1)
            dl2_srcdep_dict[k] = dl2_srcdep

            if i == 0:
                dl2_srcindep = dl2_df.drop(data_srcdep[k].keys(), axis=1)

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2'))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)

    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)

    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    if not config['source_dependent']:
        write_dl2_dataframe(dl2, output_file)

    else:
        write_dl2_dataframe(dl2_srcindep, output_file)
        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1), output_file,
                        dl2_params_src_dep_lstcam_key)
Beispiel #20
0
def test_roundtrip(tmpdir: Path):
    """
    Check that we can write DL1+DL2 info to files and read them back

    Parameters
    ----------
    tmpdir :
        temp directory fixture
    """

    output_path = Path(tmpdir / "events.DL1DL2.h5")
    source = EventSource(
        get_dataset_path(
            "gamma_LaPalma_baseline_20Zd_180Az_prod3b_test.simtel.gz"),
        max_events=20,
        allowed_tels=[1, 2, 3, 4],
    )
    calibrate = CameraCalibrator(subarray=source.subarray)

    events = []

    with DataWriter(
            event_source=source,
            output_path=output_path,
            write_parameters=False,
            write_images=True,
            transform_image=True,
            image_dtype="int32",
            image_scale=10,
            transform_peak_time=True,
            peak_time_dtype="int16",
            peak_time_scale=100,
            write_stereo_shower=True,
            write_mono_shower=True,
    ) as write:
        write.log.level = logging.DEBUG
        for event in source:
            calibrate(event)
            write(event)
            generate_dummy_dl2(event)
            events.append(deepcopy(event))
        write.write_simulation_histograms(source)
        assert DataLevel.DL1_IMAGES in write.datalevels
        assert DataLevel.DL1_PARAMETERS not in write.datalevels
        assert DataLevel.DL2 in write.datalevels

    assert output_path.exists()

    # check we can get the subarray description:
    sub = SubarrayDescription.from_hdf(output_path)
    assert sub.num_tels > 0

    # check a few things in the output just to make sure there is output. For a
    # full test of the data model, a verify tool should be created.
    with tables.open_file(output_path) as h5file:
        images = h5file.get_node("/dl1/event/telescope/images/tel_001")

        assert len(images) > 0
        assert images.col("image").dtype == np.int32
        assert images.col("peak_time").dtype == np.int16
        assert images.col("image").max() > 0.0

        # check that DL2 info is there
        dl2_energy = h5file.get_node(
            "/dl2/event/subarray/energy/ImPACTReconstructor")
        assert np.allclose(dl2_energy.col("energy"), 10)
        assert np.count_nonzero(dl2_energy.col("tel_ids")[0]) == 3

        dl2_tel_energy = h5file.get_node(
            "/dl2/event/telescope/energy/HillasReconstructor/tel_001")
        assert np.allclose(dl2_tel_energy.col("energy"), 10)
        assert "tel_ids" not in dl2_tel_energy

    # make sure it is readable by the event source and matches the images

    for event in EventSource(output_path):

        for tel_id, dl1 in event.dl1.tel.items():
            original_image = events[event.count].dl1.tel[tel_id].image
            read_image = dl1.image
            assert np.allclose(original_image, read_image, atol=0.1)

            original_peaktime = events[event.count].dl1.tel[tel_id].peak_time
            read_peaktime = dl1.peak_time
            assert np.allclose(original_peaktime, read_peaktime, atol=0.01)
Beispiel #21
0
def test_write(tmpdir: Path):
    """
    Check that we can write and read data from R0-DL2 to files

    Parameters
    ----------
    tmpdir :
        temp directory fixture
    """

    output_path = Path(tmpdir / "events.dl1.h5")
    source = EventSource(
        get_dataset_path(
            "gamma_LaPalma_baseline_20Zd_180Az_prod3b_test.simtel.gz"),
        max_events=20,
        allowed_tels=[1, 2, 3, 4],
        focal_length_choice='nominal',
    )
    calibrate = CameraCalibrator(subarray=source.subarray)

    with DataWriter(
            event_source=source,
            output_path=output_path,
            write_parameters=False,
            write_images=True,
            write_showers=True,
            write_raw_waveforms=True,
            write_waveforms=True,
    ) as writer:
        writer.log.level = logging.DEBUG
        for event in source:
            calibrate(event)
            generate_dummy_dl2(event)
            writer(event)
        writer.write_simulation_histograms(source)

    assert output_path.exists()

    # check we can get the subarray description:
    sub = SubarrayDescription.from_hdf(output_path)
    assert sub.num_tels > 0

    # check a few things in the output just to make sure there is output. For a
    # full test of the data model, a verify tool should be created.
    with tables.open_file(output_path) as h5file:
        # check R0:
        r0tel = h5file.get_node("/r0/event/telescope/tel_001")
        assert r0tel.col("waveform").max() > 0

        # check R1:
        r1tel = h5file.get_node("/r1/event/telescope/tel_001")
        assert r1tel.col("waveform").max() > 0

        # check DL1:
        images = h5file.get_node("/dl1/event/telescope/images/tel_001")
        assert images.col("image").max() > 0.0
        assert (h5file.root._v_attrs["CTA PRODUCT DATA MODEL VERSION"]  # pylint: disable=protected-access
                == DATA_MODEL_VERSION)
        shower = h5file.get_node("/simulation/event/subarray/shower")
        assert len(shower) > 0
        assert shower.col("true_alt").mean() > 0.0
        assert (shower._v_attrs["true_alt_UNIT"] == "deg")  # pylint: disable=protected-access

        # check DL2:
        dl2_energy = h5file.get_node(
            "/dl2/event/subarray/energy/ImPACTReconstructor")
        assert np.allclose(dl2_energy.col("energy"), 10)
        assert np.count_nonzero(dl2_energy.col("tel_ids")[0]) == 3

        dl2_tel_energy = h5file.get_node(
            "/dl2/event/telescope/energy/HillasReconstructor/tel_002")
        assert np.allclose(dl2_tel_energy.col("energy"), 10)
        assert "tel_ids" not in dl2_tel_energy
def main():
    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(args.config_file)
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    subarray_info = SubarrayDescription.from_hdf(args.gammatest)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    reg_energy, reg_disp_vector, cls_gh = dl1_to_dl2.build_models(
        args.gammafile,
        args.protonfile,
        save_models=args.storerf,
        path_models=args.path_models,
        custom_config=config,
    )

    gammas = filter_events(
        pd.read_hdf(args.gammatest, key=dl1_params_lstcam_key),
        config["events_filters"],
    )
    proton = filter_events(
        pd.read_hdf(args.protontest, key=dl1_params_lstcam_key),
        config["events_filters"],
    )

    data = pd.concat([gammas, proton], ignore_index=True)

    dl2 = dl1_to_dl2.apply_models(data,
                                  cls_gh,
                                  reg_energy,
                                  reg_disp_vector,
                                  focal_length=focal_length,
                                  custom_config=config)

    ####PLOT SOME RESULTS#####

    selected_gammas = dl2.query('reco_type==0 & mc_type==0')

    if (len(selected_gammas) == 0):
        log.warning('No gammas selected, I will not plot any output')
        sys.exit()

    plot_dl2.plot_features(dl2)
    if not args.batch:
        plt.show()

    plot_dl2.energy_results(selected_gammas)
    if not args.batch:
        plt.show()

    plot_dl2.direction_results(selected_gammas)
    if not args.batch:
        plt.show()

    plot_dl2.plot_disp_vector(selected_gammas)
    if not args.batch:
        plt.show()

    plot_dl2.plot_pos(dl2)
    if not args.batch:
        plt.show()

    plot_dl2.plot_roc_gamma(dl2)
    if not args.batch:
        plt.show()

    plot_dl2.plot_models_features_importances(args.path_models,
                                              args.config_file)
    if not args.batch:
        plt.show()

    plt.hist(dl2[dl2['mc_type'] == 101]['gammaness'], bins=100)
    plt.hist(dl2[dl2['mc_type'] == 0]['gammaness'], bins=100)
    if not args.batch:
        plt.show()
Beispiel #23
0
def build_models(
    filegammas,
    fileprotons,
    save_models=True,
    path_models="./",
    energy_min=-np.inf,
    custom_config={},
    test_size=0.2,
):
    """Uses MC data to train Random Forests for Energy and disp_norm
    reconstruction and G/H separation. Returns 3 trained RF.
    The config in config_file superseeds the one passed in argument.

    Parameters
    ----------
    filegammas: string
        Name of the file with MC gamma events
    fileprotons: string
        Name of the file with MC proton events
    energy_min: float
        Cut in energy for gamma/hadron separation
    intensity_min: float
        Cut in intensity of the showers for training RF. Default is 60 phe
    r_min: float
        Cut in distance from c.o.g of hillas ellipse to camera center, to avoid images truncated
        in the border. Default is 80% of camera radius.
    save_models: boolean
        Save the trained RF in a file to use them anytime.
    path_models: string
        path to store the trained RF
    regression_args: dictionnary
    classification_args: dictionnary
    config_file: str
        Path to a configuration file. If given, overwrite `regression_args`.

    Returns
    -------
    (regressor_energy, regressor_disp, classifier_gh)
    regressor_energy: `RandomForestRegressor`
    regressor_disp: `RandomForestRegressor`
    classifier_gh: `RandomForestClassifier`
    """

    config = replace_config(standard_config, custom_config)
    events_filters = config["events_filters"]

    # Adding a filter on mc_type just for training
    events_filters['mc_type'] = [-9000, np.inf]

    df_gamma = pd.read_hdf(filegammas, key=dl1_params_lstcam_key)
    df_proton = pd.read_hdf(fileprotons, key=dl1_params_lstcam_key)

    if config['source_dependent']:
        # if source-dependent parameters are already in dl1 data, just read those data
        # if not, source-dependent parameters are added here
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(filegammas):
            src_dep_df_gamma = get_srcdep_params(filegammas)

        else:
            subarray_info = SubarrayDescription.from_hdf(filegammas)
            tel_id = config["allowed_tels"][
                0] if "allowed_tels" in config else 1
            focal_length = subarray_info.tel[
                tel_id].optics.equivalent_focal_length
            src_dep_df_gamma = get_source_dependent_parameters(
                df_gamma, config, focal_length=focal_length)

        df_gamma = pd.concat([df_gamma, src_dep_df_gamma['on']], axis=1)

        # if source-dependent parameters are already in dl1 data, just read those data
        # if not, source-dependent parameters are added here
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(fileprotons):
            src_dep_df_proton = get_srcdep_params(fileprotons)

        else:
            subarray_info = SubarrayDescription.from_hdf(fileprotons)
            tel_id = config["allowed_tels"][
                0] if "allowed_tels" in config else 1
            focal_length = subarray_info.tel[
                tel_id].optics.equivalent_focal_length
            src_dep_df_proton = get_source_dependent_parameters(
                df_proton, config, focal_length=focal_length)

        df_proton = pd.concat([df_proton, src_dep_df_proton['on']], axis=1)

    df_gamma = utils.filter_events(
        df_gamma,
        filters=events_filters,
        finite_params=config['energy_regression_features'] +
        config['disp_regression_features'] +
        config['particle_classification_features'] +
        config['disp_classification_features'],
    )

    df_proton = utils.filter_events(
        df_proton,
        filters=events_filters,
        finite_params=config['energy_regression_features'] +
        config['disp_regression_features'] +
        config['particle_classification_features'] +
        config['disp_classification_features'],
    )

    #Training MC gammas in reduced viewcone
    src_r_m = np.sqrt(df_gamma['src_x']**2 + df_gamma['src_y']**2)
    foclen = OPTICS.equivalent_focal_length.value
    src_r_deg = np.rad2deg(np.arctan(src_r_m / foclen))
    df_gamma = df_gamma[(src_r_deg >= config['train_gamma_src_r_deg'][0])
                        & (src_r_deg <= config['train_gamma_src_r_deg'][1])]

    # Train regressors for energy and disp_norm reconstruction, only with gammas

    reg_energy = train_energy(df_gamma, custom_config=config)

    if config['disp_method'] == 'disp_vector':
        reg_disp_vector = train_disp_vector(df_gamma, custom_config=config)
    elif config['disp_method'] == 'disp_norm_sign':
        reg_disp_norm = train_disp_norm(df_gamma, custom_config=config)
        cls_disp_sign = train_disp_sign(df_gamma, custom_config=config)

    # Train classifier for gamma/hadron separation.

    train, testg = train_test_split(df_gamma, test_size=test_size)
    test = testg.append(df_proton, ignore_index=True)

    temp_reg_energy = train_energy(train, custom_config=config)

    if config['disp_method'] == 'disp_vector':
        temp_reg_disp_vector = train_disp_vector(train, custom_config=config)
    elif config['disp_method'] == 'disp_norm_sign':
        tmp_reg_disp_norm = train_disp_norm(train, custom_config=config)
        tmp_cls_disp_sign = train_disp_sign(train, custom_config=config)

    # Apply the regressors to the test set

    test['log_reco_energy'] = temp_reg_energy.predict(
        test[config['energy_regression_features']])

    if config['disp_method'] == 'disp_vector':
        disp_vector = temp_reg_disp_vector.predict(
            test[config['disp_regression_features']])
    elif config['disp_method'] == 'disp_norm_sign':
        disp_norm = tmp_reg_disp_norm.predict(
            test[config['disp_regression_features']])
        disp_sign = tmp_cls_disp_sign.predict(
            test[config['disp_classification_features']])
        test['reco_disp_norm'] = disp_norm
        test['reco_disp_sign'] = disp_sign

        disp_angle = test[
            'psi']  # the source here is supposed to be in the direction given by Hillas
        disp_vector = disp.disp_vector(disp_norm, disp_angle, disp_sign)

    test['reco_disp_dx'] = disp_vector[:, 0]
    test['reco_disp_dy'] = disp_vector[:, 1]

    test['reco_src_x'], test['reco_src_y'] = disp.disp_to_pos(
        test['reco_disp_dx'], test['reco_disp_dy'], test['x'], test['y'])

    # give skewness and time gradient a meaningful sign, i.e. referred to the reconstructed source position:
    longi, _ = camera_to_shower_coordinates(test['reco_src_x'],
                                            test['reco_src_y'], test['x'],
                                            test['y'], test['psi'])
    test['signed_skewness'] = -1 * np.sign(longi) * test['skewness']
    test['signed_time_gradient'] = -1 * np.sign(longi) * test['time_gradient']

    # Apply cut in reconstructed energy. New train set is the previous
    # test with energy and disp_norm reconstructed.

    train = test[test['log_reco_energy'] > energy_min]

    del temp_reg_energy

    if config['disp_method'] == 'disp_vector':
        del temp_reg_disp_vector
    elif config['disp_method'] == 'disp_norm_sign':
        del tmp_reg_disp_norm, tmp_cls_disp_sign

    # Train the Classifier

    cls_gh = train_sep(train, custom_config=config)

    if save_models:
        os.makedirs(path_models, exist_ok=True)

        file_reg_energy = path_models + "/reg_energy.sav"
        joblib.dump(reg_energy, file_reg_energy)

        if config['disp_method'] == 'disp_vector':
            file_reg_disp_vector = path_models + "/reg_disp_vector.sav"
            joblib.dump(reg_disp_vector, file_reg_disp_vector)
        elif config['disp_method'] == 'disp_norm_sign':
            file_reg_disp_norm = os.path.join(path_models, 'reg_disp_norm.sav')
            file_cls_disp_sign = os.path.join(path_models, 'cls_disp_sign.sav')
            joblib.dump(reg_disp_norm, file_reg_disp_norm)
            joblib.dump(cls_disp_sign, file_cls_disp_sign)

        file_cls_gh = path_models + "/cls_gh.sav"
        joblib.dump(cls_gh, file_cls_gh)

    if config['disp_method'] == 'disp_vector':
        return reg_energy, reg_disp_vector, cls_gh
    elif config['disp_method'] == 'disp_norm_sign':
        return reg_energy, reg_disp_norm, cls_disp_sign, cls_gh
Beispiel #24
0
def main():
    args = parser.parse_args()

    log.setLevel(logging.INFO)
    handler = logging.StreamHandler()
    logging.getLogger().addHandler(handler)

    if Path(args.output_file).exists():
        log.critical(f'Output file {args.output_file} already exists')
        sys.exit(1)

    std_config = get_standard_config()
    if args.config_file is not None:
        config = replace_config(std_config,
                                read_configuration_file(args.config_file))
    else:
        config = std_config

    with tables.open_file(args.input_file, 'r') as f:
        is_simulation = 'simulation' in f.root

    increase_nsb = False
    increase_psf = False
    if "image_modifier" in config:
        imconfig = config["image_modifier"]
        increase_nsb = imconfig["increase_nsb"]
        increase_psf = imconfig["increase_psf"]
        if increase_nsb or increase_psf:
            log.info(f"image_modifier configuration: {imconfig}")
        extra_noise_in_dim_pixels = imconfig["extra_noise_in_dim_pixels"]
        extra_bias_in_dim_pixels = imconfig["extra_bias_in_dim_pixels"]
        transition_charge = imconfig["transition_charge"]
        extra_noise_in_bright_pixels = imconfig["extra_noise_in_bright_pixels"]
        smeared_light_fraction = imconfig["smeared_light_fraction"]
        if (increase_nsb or increase_psf):
            log.info(
                "NOTE: Using the image_modifier options means images will "
                "not be saved.")
            args.no_image = True

    if is_simulation:
        args.pedestal_cleaning = False

    if args.pedestal_cleaning:
        log.info("Pedestal cleaning")
        clean_method_name = 'tailcuts_clean_with_pedestal_threshold'
        sigma = config[clean_method_name]['sigma']
        pedestal_thresh = get_threshold_from_dl1_file(args.input_file, sigma)
        cleaning_params = get_cleaning_parameters(config, clean_method_name)
        pic_th, boundary_th, isolated_pixels, min_n_neighbors = cleaning_params
        log.info(
            f"Fraction of pixel cleaning thresholds above picture thr.:"
            f"{np.sum(pedestal_thresh > pic_th) / len(pedestal_thresh):.3f}")
        picture_th = np.clip(pedestal_thresh, pic_th, None)
        log.info(f"Tailcut clean with pedestal threshold config used:"
                 f"{config['tailcuts_clean_with_pedestal_threshold']}")
    else:
        clean_method_name = 'tailcut'
        cleaning_params = get_cleaning_parameters(config, clean_method_name)
        picture_th, boundary_th, isolated_pixels, min_n_neighbors = cleaning_params
        log.info(f"Tailcut config used: {config['tailcut']}")

    use_dynamic_cleaning = False
    if 'apply' in config['dynamic_cleaning']:
        use_dynamic_cleaning = config['dynamic_cleaning']['apply']

    if use_dynamic_cleaning:
        THRESHOLD_DYNAMIC_CLEANING = config['dynamic_cleaning']['threshold']
        FRACTION_CLEANING_SIZE = config['dynamic_cleaning'][
            'fraction_cleaning_intensity']
        log.info(
            "Using dynamic cleaning for events with average size of the "
            f"3 most brighest pixels > {config['dynamic_cleaning']['threshold']} p.e"
        )
        log.info(
            "Remove from image pixels which have charge below "
            f"= {config['dynamic_cleaning']['fraction_cleaning_intensity']} * average size"
        )

    use_only_main_island = True
    if "use_only_main_island" in config[clean_method_name]:
        use_only_main_island = config[clean_method_name][
            "use_only_main_island"]

    delta_time = None
    if "delta_time" in config[clean_method_name]:
        delta_time = config[clean_method_name]["delta_time"]

    subarray_info = SubarrayDescription.from_hdf(args.input_file)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    optics = subarray_info.tel[tel_id].optics
    camera_geom = subarray_info.tel[tel_id].camera.geometry

    dl1_container = DL1ParametersContainer()
    parameters_to_update = [
        'intensity', 'x', 'y', 'r', 'phi', 'length', 'width', 'psi',
        'skewness', 'kurtosis', 'concentration_cog', 'concentration_core',
        'concentration_pixel', 'leakage_intensity_width_1',
        'leakage_intensity_width_2', 'leakage_pixels_width_1',
        'leakage_pixels_width_2', 'n_islands', 'intercept', 'time_gradient',
        'n_pixels', 'wl', 'log_intensity'
    ]

    nodes_keys = get_dataset_keys(args.input_file)
    if args.no_image:
        nodes_keys.remove(dl1_images_lstcam_key)

    metadata = global_metadata()

    with tables.open_file(args.input_file, mode='r') as infile:
        image_table = infile.root[dl1_images_lstcam_key]
        dl1_params_input = infile.root[dl1_params_lstcam_key].colnames
        disp_params = {
            'disp_dx', 'disp_dy', 'disp_norm', 'disp_angle', 'disp_sign'
        }
        if set(dl1_params_input).intersection(disp_params):
            parameters_to_update.extend(disp_params)
        uncertainty_params = {'width_uncertainty', 'length_uncertainty'}
        if set(dl1_params_input).intersection(uncertainty_params):
            parameters_to_update.extend(uncertainty_params)

        if increase_nsb:
            rng = np.random.default_rng(
                infile.root.dl1.event.subarray.trigger.col('obs_id')[0])

        if increase_psf:
            set_numba_seed(
                infile.root.dl1.event.subarray.trigger.col('obs_id')[0])

        image_mask_save = not args.no_image and 'image_mask' in infile.root[
            dl1_images_lstcam_key].colnames

        with tables.open_file(args.output_file,
                              mode='a',
                              filters=HDF5_ZSTD_FILTERS) as outfile:
            copy_h5_nodes(infile, outfile, nodes=nodes_keys)
            add_source_filenames(outfile, [args.input_file])

            params = outfile.root[dl1_params_lstcam_key].read()
            if image_mask_save:
                image_mask = outfile.root[dl1_images_lstcam_key].col(
                    'image_mask')

            # need container to use lstchain.io.add_global_metadata and lstchain.io.add_config_metadata
            for k, item in metadata.as_dict().items():
                outfile.root[dl1_params_lstcam_key].attrs[k] = item
            outfile.root[dl1_params_lstcam_key].attrs["config"] = str(config)

            for ii, row in enumerate(image_table):

                dl1_container.reset()

                image = row['image']
                peak_time = row['peak_time']

                if increase_nsb:
                    # Add noise in pixels, to adjust MC to data noise levels.
                    # TO BE DONE: in case of "pedestal cleaning" (not used now
                    # in MC) we should recalculate picture_th above!
                    image = add_noise_in_pixels(rng, image,
                                                extra_noise_in_dim_pixels,
                                                extra_bias_in_dim_pixels,
                                                transition_charge,
                                                extra_noise_in_bright_pixels)
                if increase_psf:
                    image = random_psf_smearer(
                        image, smeared_light_fraction,
                        camera_geom.neighbor_matrix_sparse.indices,
                        camera_geom.neighbor_matrix_sparse.indptr)

                signal_pixels = tailcuts_clean(camera_geom, image, picture_th,
                                               boundary_th, isolated_pixels,
                                               min_n_neighbors)

                n_pixels = np.count_nonzero(signal_pixels)

                if n_pixels > 0:

                    # if delta_time has been set, we require at least one
                    # neighbor within delta_time to accept a pixel in the image:
                    if delta_time is not None:
                        cleaned_pixel_times = peak_time
                        # makes sure only signal pixels are used in the time
                        # check:
                        cleaned_pixel_times[~signal_pixels] = np.nan
                        new_mask = apply_time_delta_cleaning(
                            camera_geom, signal_pixels, cleaned_pixel_times, 1,
                            delta_time)
                        signal_pixels = new_mask

                    if use_dynamic_cleaning:
                        new_mask = apply_dynamic_cleaning(
                            image, signal_pixels, THRESHOLD_DYNAMIC_CLEANING,
                            FRACTION_CLEANING_SIZE)
                        signal_pixels = new_mask

                    # count a number of islands after all of the image cleaning steps
                    num_islands, island_labels = number_of_islands(
                        camera_geom, signal_pixels)
                    dl1_container.n_islands = num_islands

                    n_pixels_on_island = np.bincount(
                        island_labels.astype(np.int64))
                    n_pixels_on_island[
                        0] = 0  # first island is no-island and should not be considered
                    max_island_label = np.argmax(n_pixels_on_island)

                    if use_only_main_island:
                        signal_pixels[
                            island_labels != max_island_label] = False

                    # count the surviving pixels
                    n_pixels = np.count_nonzero(signal_pixels)
                    dl1_container.n_pixels = n_pixels

                    if n_pixels > 0:
                        parametrize_image(
                            image=image,
                            peak_time=peak_time,
                            signal_pixels=signal_pixels,
                            camera_geometry=camera_geom,
                            focal_length=optics.equivalent_focal_length,
                            dl1_container=dl1_container,
                        )

                if set(dl1_params_input).intersection(disp_params):
                    disp_dx, disp_dy, disp_norm, disp_angle, disp_sign = disp(
                        dl1_container['x'].to_value(u.m),
                        dl1_container['y'].to_value(u.m), params['src_x'][ii],
                        params['src_y'][ii])

                    dl1_container['disp_dx'] = disp_dx
                    dl1_container['disp_dy'] = disp_dy
                    dl1_container['disp_norm'] = disp_norm
                    dl1_container['disp_angle'] = disp_angle
                    dl1_container['disp_sign'] = disp_sign

                for p in parameters_to_update:
                    params[ii][p] = u.Quantity(dl1_container[p]).value

                if image_mask_save:
                    image_mask[ii] = signal_pixels

            outfile.root[dl1_params_lstcam_key][:] = params
            if image_mask_save:
                outfile.root[dl1_images_lstcam_key].modify_column(
                    colname='image_mask', column=image_mask)

        write_metadata(metadata, args.output_file)
Beispiel #25
0
def main():

    output_file_name = 'longterm_dl1_check.h5'
    files = glob.glob('datacheck_dl1_LST-1.Run?????.h5')
    files.sort()

    # hardcoded for now, to be eventually read from data:
    numpixels = 1855

    # subrun-wise tables: cosmics, pedestals, flatfield. One dictionary per
    # each. Note that the cosmics table contains also muon ring information!

    cosmics = {
        'runnumber': [],
        'subrun': [],
        'time': [],
        'elapsed_time': [],
        'events': [],
        'azimuth': [],
        'altitude': []
    }

    pedestals = copy.deepcopy(cosmics)
    flatfield = copy.deepcopy(cosmics)

    # add table-specific fields:

    cosmics['num_contained_mu_rings'] = []
    cosmics['mu_effi_mean'] = []
    cosmics['mu_effi_stddev'] = []
    cosmics['mu_width_mean'] = []
    cosmics['mu_width_stddev'] = []
    cosmics['mu_radius_mean'] = []
    cosmics['mu_radius_stddev'] = []
    cosmics['mu_intensity_mean'] = []
    cosmics['mu_hg_peak_sample'] = []
    cosmics['mu_hg_peak_sample_stddev'] = []
    cosmics['fraction_pulses_above10'] = []  # fraction of >10 pe pulses
    cosmics['fraction_pulses_above30'] = []  # fraction of >30 pe pulses

    pedestals['fraction_pulses_above10'] = []  # fraction of >10 pe pulses
    pedestals['fraction_pulses_above30'] = []  # fraction of >30 pe pulses
    pedestals['charge_mean'] = []
    pedestals['charge_stddev'] = []

    flatfield['charge_mean'] = []
    flatfield['charge_stddev'] = []
    flatfield['rel_time_mean'] = []
    flatfield['rel_time_stddev'] = []

    # now another dictionary for a run-wise table, with no pixel-wise info:

    runsummary = {
        'runnumber': [],
        'time': [],
        'elapsed_time': [],
        'min_altitude': [],
        'mean_altitude': [],
        'max_altitude': [],
        # currently (as of lstchain 0.5.3) event numbers are post-cleaning!:
        'num_cosmics': [],
        'num_pedestals': [],
        'num_flatfield': [],
        'num_pedestals_after_cleaning': [],
        'num_contained_mu_rings': [],
        'ff_charge_mean': [],  # camera average of mean pix FF charge
        'ff_charge_mean_err': [],  # uncertainty of the above
        'ff_charge_stddev': [],  # camera average
        'ff_time_mean': [],  # camera average of mean FF time
        'ff_time_mean_err': [],  # uncertainty of the above
        'ff_time_stddev': [],  # camera average
        'ff_rel_time_stddev': [],  # camera-averaged std dev of pixel t
        # w.r.t. average of rest of pixels in camera (~ t-resolution)
        'ped_charge_mean': [],  # camera average of mean pix ped charge
        'ped_charge_mean_err': [],  # uncertainty of the above
        'ped_charge_stddev': [],  # camera average
        'ped_fraction_pulses_above10': [],  # in whole camera
        'ped_fraction_pulses_above30': [],  # in whole camera
        'cosmics_fraction_pulses_above10': [],  # in whole camera
        'cosmics_fraction_pulses_above30': [],  # in whole camera
        'mu_effi_mean': [],
        'mu_effi_stddev': [],
        'mu_width_mean': [],
        'mu_width_stddev': [],
        'mu_hg_peak_sample_mean': [],
        'mu_hg_peak_sample_stddev': [],
        'mu_intensity_mean': []
    }

    # and another one for pixel-wise run averages:
    pixwise_runsummary = {
        'ff_pix_charge_mean': [],
        'ff_pix_charge_stddev': [],
        'ff_pix_rel_time_mean': [],
        'ff_pix_rel_time_stddev': [],
        'ped_pix_charge_mean': [],
        'ped_pix_charge_stddev': [],
        'ped_pix_fraction_pulses_above10': [],
        'ped_pix_fraction_pulses_above30': [],
        'cosmics_pix_fraction_pulses_above10': [],
        'cosmics_pix_fraction_pulses_above30': []
    }

    # Needed for the table description for writing it out to the hdf5 file. Because
    # of the vector columns we cannot write this out using pandas:
    class pixwise_info(tables.IsDescription):
        runnumber = tables.Int32Col()
        time = tables.Float64Col()
        ff_pix_charge_mean = tables.Float32Col(shape=(numpixels))
        ff_pix_charge_stddev = tables.Float32Col(shape=(numpixels))
        ff_pix_rel_time_mean = tables.Float32Col(shape=(numpixels))
        ff_pix_rel_time_stddev = tables.Float32Col(shape=(numpixels))
        ped_pix_charge_mean = tables.Float32Col(shape=(numpixels))
        ped_pix_charge_stddev = tables.Float32Col(shape=(numpixels))
        ped_pix_fraction_pulses_above10 = tables.Float32Col(shape=(numpixels))
        ped_pix_fraction_pulses_above30 = tables.Float32Col(shape=(numpixels))
        cosmics_pix_fraction_pulses_above10 = tables.Float32Col(
            shape=(numpixels))
        cosmics_pix_fraction_pulses_above30 = tables.Float32Col(
            shape=(numpixels))

    dicts = [cosmics, pedestals, flatfield]

    for file in files:

        try:
            a = tables.open_file(file)
        except FileNotFoundError:
            print('Could not read file', file, '- skipping...')
            continue

        print(file)
        runnumber = int(file[file.find('.Run') + 4:file.find('.Run') + 9])

        datatables = []
        for name in [
                '/dl1datacheck/cosmics', '/dl1datacheck/pedestals',
                '/dl1datacheck/flatfield'
        ]:
            try:
                node = a.get_node(name)
            except Exception:
                print('   Table', name, 'is missing!')
                datatables.append(None)
                continue

            datatables.append(node)

        subruns = None

        # fill data which are common to all tables:
        for table, d in zip(datatables, dicts):
            if table is None:
                continue
            d['runnumber'].extend(len(table) * [runnumber])
            d['subrun'].extend(table.col('subrun_index'))
            d['elapsed_time'].extend(table.col('elapsed_time'))
            d['events'].extend(table.col('num_events'))
            d['time'].extend(table.col('dragon_time').mean(axis=1))
            d['azimuth'].extend(table.col('mean_az_tel'))
            d['altitude'].extend(table.col('mean_alt_tel'))

        # now fill table-specific quantities. In some cases they are
        # pixel-averaged values:

        if datatables[0] is not None:
            table = a.root.dl1datacheck.cosmics
            cosmics['fraction_pulses_above10'].extend(
                table.col('num_pulses_above_0010_pe').mean(axis=1) /
                table.col('num_events'))
            cosmics['fraction_pulses_above30'].extend(
                table.col('num_pulses_above_0030_pe').mean(axis=1) /
                table.col('num_events'))

        if datatables[1] is not None:
            table = a.root.dl1datacheck.pedestals
            pedestals['fraction_pulses_above10'].extend(
                table.col('num_pulses_above_0010_pe').mean(axis=1) /
                table.col('num_events'))
            pedestals['fraction_pulses_above30'].extend(
                table.col('num_pulses_above_0030_pe').mean(axis=1) /
                table.col('num_events'))
            pedestals['charge_mean'].extend(
                table.col('charge_mean').mean(axis=1))
            pedestals['charge_stddev'].extend(
                table.col('charge_stddev').mean(axis=1))

        if datatables[2] is not None:
            table = a.root.dl1datacheck.flatfield
            flatfield['charge_mean'].extend(
                table.col('charge_mean').mean(axis=1))
            flatfield['charge_stddev'].extend(
                table.col('charge_stddev').mean(axis=1))
            flatfield['rel_time_mean'].extend(
                table.col('relative_time_mean').mean(axis=1))
            flatfield['rel_time_stddev'].extend(
                table.col('relative_time_stddev').mean(axis=1))

        table = a.root.dl1datacheck.cosmics

        # needed later for the muons:
        subruns = table.col('subrun_index')

        # now fill the run-wise table:
        runsummary['runnumber'].extend([runnumber])
        runsummary['time'].extend([table.col('dragon_time').mean()])
        runsummary['elapsed_time'].extend([table.col('elapsed_time').sum()])
        runsummary['min_altitude'].extend([table.col('mean_alt_tel').min()])
        runsummary['mean_altitude'].extend([table.col('mean_alt_tel').mean()])
        runsummary['max_altitude'].extend([table.col('mean_alt_tel').max()])
        runsummary['num_cosmics'].extend([table.col('num_events').sum()])
        runsummary['cosmics_fraction_pulses_above10'].extend([
            (table.col('num_pulses_above_0010_pe').mean(axis=1)).sum() /
            runsummary['num_cosmics'][-1]
        ])
        runsummary['cosmics_fraction_pulses_above30'].extend([
            (table.col('num_pulses_above_0030_pe').mean(axis=1)).sum() /
            runsummary['num_cosmics'][-1]
        ])
        pixwise_runsummary['cosmics_pix_fraction_pulses_above10'].extend([
            table.col('num_pulses_above_0010_pe').sum(axis=0) /
            runsummary['num_cosmics'][-1]
        ])
        pixwise_runsummary['cosmics_pix_fraction_pulses_above30'].extend([
            table.col('num_pulses_above_0030_pe').sum(axis=0) /
            runsummary['num_cosmics'][-1]
        ])

        if datatables[1] is not None:
            table = a.root.dl1datacheck.pedestals
            nevents = table.col('num_events')  # events per subrun
            events_in_run = nevents.sum()

            runsummary['num_pedestals'].extend([table.col('num_events').sum()])
            runsummary['num_pedestals_after_cleaning'].extend(
                [table.col('num_cleaned_events').sum()])

            runsummary['ped_fraction_pulses_above10'].extend([
                (table.col('num_pulses_above_0010_pe').mean(axis=1)).sum() /
                runsummary['num_pedestals'][-1]
            ])
            runsummary['ped_fraction_pulses_above30'].extend([
                (table.col('num_pulses_above_0030_pe').mean(axis=1)).sum() /
                runsummary['num_pedestals'][-1]
            ])

            # Mean pedestal charge through a run, for each pixel:
            charge_mean = np.sum(table.col('charge_mean') * nevents[:, None],
                                 axis=0) / events_in_run
            # Now store the pixel-averaged mean pedestal charge:
            runsummary['ped_charge_mean'].extend([np.nanmean(charge_mean)])
            npixels = len(charge_mean)
            runsummary['ped_charge_mean_err'].extend(
                [np.nanstd(charge_mean) / np.sqrt(npixels)])
            # Pedestal charge std dev through a run, for each pixel:
            charge_stddev =\
                np.sqrt(np.sum((table.col('charge_stddev')**2)*nevents[:, None],
                               axis=0) / events_in_run)
            # Store the pixel-averaged pedestal charge std dev:
            runsummary['ped_charge_stddev'].extend([np.nanmean(charge_stddev)])

            pixwise_runsummary['ped_pix_fraction_pulses_above10'].extend([
                table.col('num_pulses_above_0010_pe').sum(axis=0) /
                runsummary['num_pedestals'][-1]
            ])
            pixwise_runsummary['ped_pix_fraction_pulses_above30'].extend([
                table.col('num_pulses_above_0030_pe').sum(axis=0) /
                runsummary['num_pedestals'][-1]
            ])
            pixwise_runsummary['ped_pix_charge_mean'].extend(
                [table.col('charge_mean').mean(axis=0)])
            pixwise_runsummary['ped_pix_charge_stddev'].extend(
                [table.col('charge_stddev').mean(axis=0)])

        else:
            runsummary['num_pedestals'].extend([np.nan])
            runsummary['num_pedestals_after_cleaning'].extend([np.nan])
            runsummary['ped_fraction_pulses_above10'].extend([np.nan])
            runsummary['ped_fraction_pulses_above30'].extend([np.nan])
            runsummary['ped_charge_mean'].extend([np.nan])
            runsummary['ped_charge_mean_err'].extend([np.nan])
            runsummary['ped_charge_stddev'].extend([np.nan])
            pixwise_runsummary['ped_pix_fraction_pulses_above10'].extend(
                [numpixels * [np.nan]])
            pixwise_runsummary['ped_pix_fraction_pulses_above30'].extend(
                [numpixels * [np.nan]])
            pixwise_runsummary['ped_pix_charge_mean'].extend(
                [numpixels * [np.nan]])
            pixwise_runsummary['ped_pix_charge_stddev'].extend(
                [numpixels * [np.nan]])

        if datatables[2] is not None:
            table = a.root.dl1datacheck.flatfield
            nevents = table.col('num_events')  # events per subrun
            events_in_run = nevents.sum()
            runsummary['num_flatfield'].extend([events_in_run])

            # Mean flat field charge through a run, for each pixel:
            charge_mean = np.sum(table.col('charge_mean') * nevents[:, None],
                                 axis=0) / events_in_run
            # Mean flat field time through a run, for each pixel:
            time_mean = np.sum(table.col('time_mean') * nevents[:, None],
                               axis=0) / events_in_run

            # Now store the pixel-averaged mean charge:
            runsummary['ff_charge_mean'].extend([np.nanmean(charge_mean)])
            npixels = len(charge_mean)
            runsummary['ff_charge_mean_err'].extend(
                [np.nanstd(charge_mean) / np.sqrt(npixels)])
            # FF charge std dev through a run, for each pixel:
            charge_stddev =\
                np.sqrt(np.sum((table.col('charge_stddev')**2)*nevents[:, None],
                               axis=0) / events_in_run)
            # Store the pixel-averaged FF charge std dev:
            runsummary['ff_charge_stddev'].extend([np.nanmean(charge_stddev)])

            # Pixel-averaged mean time:
            runsummary['ff_time_mean'].extend([np.nanmean(time_mean)])
            runsummary['ff_time_mean_err'].extend(
                [np.nanstd(time_mean) / np.sqrt(npixels)])
            # FF time std dev through a run, for each pixel:
            time_stddev =\
                np.sqrt(np.sum((table.col('time_stddev')**2)*nevents[:, None],
                               axis=0) / events_in_run)
            # Store the pixel-averaged FF time std dev:
            runsummary['ff_time_stddev'].extend([np.nanmean(time_stddev)])

            rel_time_stddev =\
                np.sqrt(np.sum((table.col('relative_time_stddev')**2) *
                               nevents[:, None], axis=0) / events_in_run)
            runsummary['ff_rel_time_stddev'].\
                extend([np.nanmean(rel_time_stddev)])

            pixwise_runsummary['ff_pix_charge_mean'].extend(
                [table.col('charge_mean').mean(axis=0)])
            pixwise_runsummary['ff_pix_charge_stddev'].extend(
                [table.col('charge_stddev').mean(axis=0)])
            pixwise_runsummary['ff_pix_rel_time_mean'].extend(
                [table.col('relative_time_mean').mean(axis=0)])
            pixwise_runsummary['ff_pix_rel_time_stddev'].extend(
                [table.col('relative_time_stddev').mean(axis=0)])
        else:
            runsummary['num_flatfield'].extend([np.nan])
            runsummary['ff_charge_mean'].extend([np.nan])
            runsummary['ff_charge_mean_err'].extend([np.nan])
            runsummary['ff_charge_stddev'].extend([np.nan])
            runsummary['ff_time_mean'].extend([np.nan])
            runsummary['ff_time_mean_err'].extend([np.nan])
            runsummary['ff_time_stddev'].extend([np.nan])
            runsummary['ff_rel_time_stddev'].extend([np.nan])
            pixwise_runsummary['ff_pix_charge_mean'].extend(
                [numpixels * [np.nan]])
            pixwise_runsummary['ff_pix_charge_stddev'].extend(
                [numpixels * [np.nan]])
            pixwise_runsummary['ff_pix_rel_time_mean'].extend(
                [numpixels * [np.nan]])
            pixwise_runsummary['ff_pix_rel_time_stddev'].extend(
                [numpixels * [np.nan]])

        a.close()

        # Now process the muon files (one per subrun, containing one entry per ring):

        empty_files = 0

        contained_mu_wholerun = None
        num_contained_mu_rings_in_run = 0

        for subrun in subruns:
            mufile = 'muons_LST-1.Run{0:05d}.{1:04d}.fits'.format(
                runnumber, subrun)

            dat = None
            try:
                dat = Table.read(mufile, format='fits')
            except Exception:
                print('   File', mufile, 'not found - going on')
            if dat is None or len(dat) == 0:
                empty_files += 1
                cosmics['num_contained_mu_rings'].extend([0])
                cosmics['mu_effi_mean'].extend([np.nan])
                cosmics['mu_effi_stddev'].extend([np.nan])
                cosmics['mu_width_mean'].extend([np.nan])
                cosmics['mu_width_stddev'].extend([np.nan])
                cosmics['mu_radius_mean'].extend([np.nan])
                cosmics['mu_radius_stddev'].extend([np.nan])
                cosmics['mu_intensity_mean'].extend([np.nan])
                cosmics['mu_hg_peak_sample'].extend([np.nan])
                cosmics['mu_hg_peak_sample_stddev'].extend([np.nan])
                continue

            df_muons = dat.to_pandas()

            # contained and clean muon rings:
            contained_mu = df_muons[(df_muons['ring_containment'] > 0.99)
                                    & (df_muons['size_outside'] < 1.)]

            num_contained_mu_rings_in_run += len(contained_mu)

            cosmics['num_contained_mu_rings'].extend([len(contained_mu)])
            cosmics['mu_effi_mean'].extend(
                [contained_mu['muon_efficiency'].mean()])
            cosmics['mu_effi_stddev'].extend(
                [contained_mu['muon_efficiency'].std()])
            cosmics['mu_width_mean'].extend(
                [contained_mu['ring_width'].mean()])
            cosmics['mu_width_stddev'].extend(
                [contained_mu['ring_width'].std()])
            cosmics['mu_radius_mean'].extend(
                [contained_mu['ring_radius'].mean()])
            cosmics['mu_radius_stddev'].extend(
                [contained_mu['ring_radius'].std()])
            cosmics['mu_intensity_mean'].extend(
                [contained_mu['ring_size'].mean()])
            cosmics['mu_hg_peak_sample'].extend(
                [contained_mu['hg_peak_sample'].mean()])
            cosmics['mu_hg_peak_sample_stddev'].extend(
                [contained_mu['hg_peak_sample'].std()])

            if contained_mu_wholerun is None:
                contained_mu_wholerun = df_muons
            else:
                contained_mu_wholerun = pd.concat(
                    [contained_mu_wholerun, df_muons], ignore_index=True)

        if empty_files > 0:
            print('   Run {0:d} had {1:d} subruns with no valid muon rings!'.
                  format(runnumber, empty_files))

        # fill the runsummary muons part:
        if contained_mu_wholerun is not None:
            runsummary['num_contained_mu_rings'].extend(
                [num_contained_mu_rings_in_run])
            # The values below are mean and std dev for all contained muon
            # rings in a run:
            runsummary['mu_effi_mean'].extend(
                [contained_mu_wholerun['muon_efficiency'].mean()])
            runsummary['mu_effi_stddev'].extend(
                [contained_mu_wholerun['muon_efficiency'].std()])
            runsummary['mu_width_mean'].extend(
                [contained_mu_wholerun['ring_width'].mean()])
            runsummary['mu_width_stddev'].extend(
                [contained_mu_wholerun['ring_width'].std()])
            runsummary['mu_intensity_mean'].extend(
                [contained_mu_wholerun['ring_size'].mean()])
            runsummary['mu_hg_peak_sample_mean'].\
                extend([contained_mu_wholerun['hg_peak_sample'].mean()])
            runsummary['mu_hg_peak_sample_stddev'].\
                extend([contained_mu_wholerun['hg_peak_sample'].std()])
        else:
            runsummary['num_contained_mu_rings'].extend([np.nan])
            runsummary['mu_effi_mean'].extend([np.nan])
            runsummary['mu_effi_stddev'].extend([np.nan])
            runsummary['mu_width_mean'].extend([np.nan])
            runsummary['mu_width_stddev'].extend([np.nan])
            runsummary['mu_intensity_mean'].extend([np.nan])
            runsummary['mu_hg_peak_sample_mean'].extend([np.nan])
            runsummary['mu_hg_peak_sample_stddev'].extend([np.nan])

    pd.DataFrame(runsummary).to_hdf(output_file_name,
                                    key='runsummary',
                                    mode='w')

    # Now write the pixel-wise run summary info:
    h5file = tables.open_file(output_file_name, mode="a")
    table = h5file.create_table('/', 'pixwise_runsummary', pixwise_info)
    row = table.row
    for i in range(len(pixwise_runsummary['ff_pix_charge_mean'])):
        # we add run number and time info also to this pixwise table:
        row['runnumber'] = runsummary['runnumber'][i]
        row['time'] = runsummary['time'][i]
        for key in pixwise_runsummary:
            row[key] = pixwise_runsummary[key][i]
        row.append()
    table.flush()
    h5file.close()

    # Finally the tables with info by event type:
    for d, name in zip(dicts, ['cosmics', 'pedestals', 'flatfield']):
        pd.DataFrame(d).to_hdf(output_file_name, key=name, mode='a')

    # We write out the camera geometry information, assuming it is the same
    # for all files (hence we take it from the first one):
    subarray_info = SubarrayDescription.from_hdf(files[0])
    subarray_info.to_hdf(output_file_name)

    plot(output_file_name)
Beispiel #26
0
def test_dl1writer_int(tmpdir: Path):
    """
    Check that we can write DL1 files

    Parameters
    ----------
    tmpdir :
        temp directory fixture
    """

    output_path = Path(tmpdir / "events.dl1.h5")
    source = EventSource(
        get_dataset_path(
            "gamma_LaPalma_baseline_20Zd_180Az_prod3b_test.simtel.gz"),
        max_events=20,
        allowed_tels=[1, 2, 3, 4],
    )
    calibrate = CameraCalibrator(subarray=source.subarray)

    events = []

    with DL1Writer(
            event_source=source,
            output_path=output_path,
            write_parameters=False,
            write_images=True,
            transform_image=True,
            image_dtype="int32",
            image_scale=10,
            transform_peak_time=True,
            peak_time_dtype="int16",
            peak_time_scale=100,
    ) as write_dl1:
        write_dl1.log.level = logging.DEBUG
        for event in source:
            calibrate(event)
            write_dl1(event)
            events.append(deepcopy(event))
        write_dl1.write_simulation_histograms(source)

    assert output_path.exists()

    # check we can get the subarray description:
    sub = SubarrayDescription.from_hdf(output_path)
    assert sub.num_tels > 0

    # check a few things in the output just to make sure there is output. For a
    # full test of the data model, a verify tool should be created.
    with tables.open_file(output_path) as h5file:
        images = h5file.get_node("/dl1/event/telescope/images/tel_001")

        assert len(images) > 0
        assert images.col("image").dtype == np.int32
        assert images.col("peak_time").dtype == np.int16
        assert images.col("image").max() > 0.0

    # make sure it is readable by the event source and matches the images

    for event in EventSource(output_path):

        for tel_id, dl1 in event.dl1.tel.items():
            original_image = events[event.count].dl1.tel[tel_id].image
            read_image = dl1.image
            assert np.allclose(original_image, read_image, atol=0.1)

            original_peaktime = events[event.count].dl1.tel[tel_id].peak_time
            read_peaktime = dl1.peak_time
            assert np.allclose(original_peaktime, read_peaktime, atol=0.01)
def main():
    args = parser.parse_args()

    custom_config = {}
    if args.config_file is not None:
        try:
            custom_config = read_configuration_file(
                os.path.abspath(args.config_file))
        except ("Custom configuration could not be loaded !!!"):
            pass

    config = replace_config(standard_config, custom_config)

    data = pd.read_hdf(args.input_file, key=dl1_params_lstcam_key)

    if 'lh_fit_config' in config.keys():
        lhfit_data = pd.read_hdf(args.input_file,
                                 key=dl1_likelihood_params_lstcam_key)
        if np.all(lhfit_data['obs_id'] == data['obs_id']) & np.all(
                lhfit_data['event_id'] == data['event_id']):
            lhfit_data.drop({'obs_id', 'event_id'}, axis=1, inplace=True)
        lhfit_keys = lhfit_data.keys()
        data = pd.concat([data, lhfit_data], axis=1)

    # if real data, add deltat t to dataframe keys
    data = add_delta_t_key(data)

    # Dealing with pointing missing values. This happened when `ucts_time` was invalid.
    if 'alt_tel' in data.columns and 'az_tel' in data.columns \
            and (np.isnan(data.alt_tel).any() or np.isnan(data.az_tel).any()):
        # make sure there is a least one good pointing value to interp from.
        if np.isfinite(data.alt_tel).any() and np.isfinite(data.az_tel).any():
            data = impute_pointing(data)
        else:
            data.alt_tel = -np.pi / 2.
            data.az_tel = -np.pi / 2.

    # Get trained RF path for reconstruction:
    file_reg_energy = os.path.join(args.path_models, 'reg_energy.sav')
    file_cls_gh = os.path.join(args.path_models, 'cls_gh.sav')
    if config['disp_method'] == 'disp_vector':
        file_disp_vector = os.path.join(args.path_models,
                                        'reg_disp_vector.sav')
    elif config['disp_method'] == 'disp_norm_sign':
        file_disp_norm = os.path.join(args.path_models, 'reg_disp_norm.sav')
        file_disp_sign = os.path.join(args.path_models, 'cls_disp_sign.sav')

    subarray_info = SubarrayDescription.from_hdf(args.input_file)
    tel_id = config["allowed_tels"][0] if "allowed_tels" in config else 1
    focal_length = subarray_info.tel[tel_id].optics.equivalent_focal_length

    # Apply the models to the data

    # Source-independent analysis
    if not config['source_dependent']:
        data = filter_events(
            data,
            filters=config["events_filters"],
            finite_params=config['energy_regression_features'] +
            config['disp_regression_features'] +
            config['particle_classification_features'] +
            config['disp_classification_features'],
        )

        if config['disp_method'] == 'disp_vector':
            dl2 = dl1_to_dl2.apply_models(data,
                                          file_cls_gh,
                                          file_reg_energy,
                                          reg_disp_vector=file_disp_vector,
                                          focal_length=focal_length,
                                          custom_config=config)
        elif config['disp_method'] == 'disp_norm_sign':
            dl2 = dl1_to_dl2.apply_models(data,
                                          file_cls_gh,
                                          file_reg_energy,
                                          reg_disp_norm=file_disp_norm,
                                          cls_disp_sign=file_disp_sign,
                                          focal_length=focal_length,
                                          custom_config=config)

    # Source-dependent analysis
    if config['source_dependent']:

        # if source-dependent parameters are already in dl1 data, just read those data.
        if dl1_params_src_dep_lstcam_key in get_dataset_keys(args.input_file):
            data_srcdep = get_srcdep_params(args.input_file)

        # if not, source-dependent parameters are added now
        else:
            data_srcdep = pd.concat(dl1_to_dl2.get_source_dependent_parameters(
                data, config, focal_length=focal_length),
                                    axis=1)

        dl2_srcdep_dict = {}
        srcindep_keys = data.keys()
        srcdep_assumed_positions = data_srcdep.columns.levels[0]

        for i, k in enumerate(srcdep_assumed_positions):
            data_with_srcdep_param = pd.concat([data, data_srcdep[k]], axis=1)
            data_with_srcdep_param = filter_events(
                data_with_srcdep_param,
                filters=config["events_filters"],
                finite_params=config['energy_regression_features'] +
                config['disp_regression_features'] +
                config['particle_classification_features'] +
                config['disp_classification_features'],
            )

            if config['disp_method'] == 'disp_vector':
                dl2_df = dl1_to_dl2.apply_models(
                    data_with_srcdep_param,
                    file_cls_gh,
                    file_reg_energy,
                    reg_disp_vector=file_disp_vector,
                    focal_length=focal_length,
                    custom_config=config)
            elif config['disp_method'] == 'disp_norm_sign':
                dl2_df = dl1_to_dl2.apply_models(data_with_srcdep_param,
                                                 file_cls_gh,
                                                 file_reg_energy,
                                                 reg_disp_norm=file_disp_norm,
                                                 cls_disp_sign=file_disp_sign,
                                                 focal_length=focal_length,
                                                 custom_config=config)

            dl2_srcdep = dl2_df.drop(srcindep_keys, axis=1)
            dl2_srcdep_dict[k] = dl2_srcdep

            if i == 0:
                dl2_srcindep = dl2_df[srcindep_keys]

    os.makedirs(args.output_dir, exist_ok=True)
    output_file = os.path.join(
        args.output_dir,
        os.path.basename(args.input_file).replace('dl1', 'dl2', 1))

    if os.path.exists(output_file):
        raise IOError(output_file + ' exists, exiting.')

    dl1_keys = get_dataset_keys(args.input_file)

    if dl1_images_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_images_lstcam_key)

    if dl1_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_lstcam_key)

    if dl1_params_src_dep_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_params_src_dep_lstcam_key)

    if dl1_likelihood_params_lstcam_key in dl1_keys:
        dl1_keys.remove(dl1_likelihood_params_lstcam_key)

    metadata = global_metadata()
    write_metadata(metadata, output_file)

    with open_file(args.input_file, 'r') as h5in:
        with open_file(output_file, 'a') as h5out:

            # Write the selected DL1 info
            for k in dl1_keys:
                if not k.startswith('/'):
                    k = '/' + k

                path, name = k.rsplit('/', 1)
                if path not in h5out:
                    grouppath, groupname = path.rsplit('/', 1)
                    g = h5out.create_group(grouppath,
                                           groupname,
                                           createparents=True)
                else:
                    g = h5out.get_node(path)

                h5in.copy_node(k, g, overwrite=True)

    # need container to use lstchain.io.add_global_metadata and lstchain.io.add_config_metadata
    if not config['source_dependent']:
        if 'lh_fit_config' not in config.keys():
            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
        else:
            dl2_onlylhfit = dl2[lhfit_keys]
            dl2.drop(lhfit_keys, axis=1, inplace=True)
            write_dl2_dataframe(dl2, output_file, config=config, meta=metadata)
            write_dataframe(dl2_onlylhfit,
                            output_file,
                            dl2_likelihood_params_lstcam_key,
                            config=config,
                            meta=metadata)

    else:
        write_dl2_dataframe(dl2_srcindep,
                            output_file,
                            config=config,
                            meta=metadata)
        write_dataframe(pd.concat(dl2_srcdep_dict, axis=1),
                        output_file,
                        dl2_params_src_dep_lstcam_key,
                        config=config,
                        meta=metadata)
def main():

    print("input files: {}".format(args.input_file))
    print("calib file: {}".format(args.calib_file))
    print("output file: {}".format(args.output_file))

    max_muons = args.max_muons

    # Definition of the output parameters for the table
    output_parameters = create_muon_table()

    if args.calib_file is not None:
        plot_calib.read_file(args.calib_file)
        bad_pixels = plot_calib.calib_data.unusable_pixels[0]
        print(f"Found a total of {np.sum(bad_pixels)} bad pixels.")

    # image = pd.read_hdf(args.input_file, key = dl1_image_lstcam_key)
    # The call above does not work, because of the file's vector columns (pixel-wise charges & times)
    # So we use tables for the time being.

    print(glob.glob(args.input_file))

    filenames = glob.glob(args.input_file)
    filenames.sort()

    lst1_tel_id = 1

    num_muons = 0

    for filename in filenames:
        print('Opening file', filename)

        subarray_info = SubarrayDescription.from_hdf(filename)
        geom = subarray_info.tel[lst1_tel_id].camera.geometry

        subarray = read_subarray_description(filename, subarray_name='LST-1')

        images = Table.read(filename, path=dl1_images_lstcam_key)['image']

        parameters = pd.read_hdf(filename, key=dl1_params_lstcam_key)
        telescope_description = read_telescopes_descriptions(
            filename)[lst1_tel_id]

        equivalent_focal_length = telescope_description.optics.equivalent_focal_length
        mirror_area = telescope_description.optics.mirror_area

        # fill dummy event times with NaNs in case they do not exist (like in MC):
        if 'dragon_time' not in parameters.keys():
            dummy_times = np.empty(len(parameters['event_id']))
            dummy_times[:] = np.nan
            parameters['dragon_time'] = dummy_times

        for full_image, event_id, dragon_time, mc_energy in zip(
                images, parameters['event_id'], parameters['dragon_time'],
                parameters['mc_energy']):
            if args.calib_file is not None:
                image = full_image * (~bad_pixels)
            else:
                image = full_image
            # print("Event {}. Number of pixels above 10 phe: {}".format(event_id,
            #                                                           np.size(image[image > 10.])))
            # if((np.size(image[image > 10.]) > 300) or (np.size(image[image > 10.]) < 50)):
            #     continue
            if not tag_pix_thr(
                    image):  # default skips pedestal and calibration events
                continue

            # default values apply no filtering.
            # This filter is rather useless for biased extractors anyway
            # if not muon_filter(image)
            #    continue

            (muonintensityparam, dist_mask, size, size_outside_ring,
             muonringparam, good_ring, radial_distribution,
             mean_pixel_charge_around_ring,
             muonparameters) = analyze_muon_event(subarray, event_id, image,
                                                  geom,
                                                  equivalent_focal_length,
                                                  mirror_area, args.plot_rings,
                                                  args.plots_path)

            if good_ring:
                num_muons += 1
                print("Number of good muon rings found {}, EventID {}".format(
                    num_muons, event_id))

            # write ring data, including also "not-so-good" rings
            # in case we want to reconsider ring selections!:
            fill_muon_event(mc_energy, output_parameters, good_ring, event_id,
                            dragon_time, muonintensityparam, dist_mask,
                            muonringparam, radial_distribution, size,
                            size_outside_ring, mean_pixel_charge_around_ring,
                            muonparameters)

            if max_muons is not None and num_muons == max_muons:
                break

        if max_muons is not None and num_muons == max_muons:
            break

    table = Table(output_parameters)
    table.write(args.output_file, format='fits', overwrite=True)