Example #1
0
def test_datasets_stack_reduce():
    obs_ids = [23523, 23526, 23559, 23592]
    dataset_list = []
    for obs in obs_ids:
        filename = "$GAMMAPY_DATA/joint-crab/spectra/hess/pha_obs{}.fits"
        ds = SpectrumDatasetOnOff.from_ogip_files(filename.format(obs))
        dataset_list.append(ds)
    datasets = Datasets(dataset_list)
    stacked = datasets.stack_reduce()
    assert_allclose(stacked.livetime.to_value("s"), 6313.8116406202325)
Example #2
0
def test_datasets_stack_reduce():
    obs_ids = [23523, 23526, 23559, 23592]
    dataset_list = []
    for obs in obs_ids:
        filename = "$GAMMAPY_DATA/joint-crab/spectra/hess/pha_obs{}.fits"
        ds = SpectrumDatasetOnOff.from_ogip_files(filename.format(obs))
        dataset_list.append(ds)
    datasets = Datasets(dataset_list)
    stacked = datasets.stack_reduce()
    assert_allclose(stacked.livetime.to_value("s"), 6313.8116406202325)

    info_table = datasets.info_table()
    assert_allclose(info_table["n_on"], [124, 126, 119, 90])

    info_table_cum = datasets.info_table(cumulative=True)
    assert_allclose(info_table_cum["n_on"], [124, 250, 369, 459])
Example #3
0
class Analysis:
    """Config-driven high-level analysis interface.

    It is initialized by default with a set of configuration parameters and values declared in
    an internal configuration schema YAML file, though the user can also provide configuration
    parameters passed as a nested dictionary at the moment of instantiation. In that case these
    parameters will overwrite the default values of those present in the configuration file.

    For more info see  :ref:`HLI`.

    Parameters
    ----------
    config : dict or `AnalysisConfig`
        Configuration options following `AnalysisConfig` schema
    """

    def __init__(self, config=None):
        if isinstance(config, dict):
            self._config = AnalysisConfig(config)
        elif isinstance(config, AnalysisConfig):
            self._config = config
        else:
            raise ValueError("Dict or `AnalysiConfig` object required.")

        self._set_logging()
        self.observations = None
        self.background_estimator = None
        self.datasets = None
        self.extraction = None
        self.model = None
        self.fit = None
        self.fit_result = None
        self.flux_points = None

    @property
    def config(self):
        """Analysis configuration (`AnalysisConfig`)"""
        return self._config

    @property
    def settings(self):
        """Configuration settings for the analysis session."""
        return self.config.settings

    def get_observations(self):
        """Fetch observations from the data store according to criteria defined in the configuration."""
        self.config.validate()
        log.info("Fetching observations.")
        datastore_path = make_path(self.settings["observations"]["datastore"])
        if datastore_path.is_file():
            datastore = DataStore().from_file(datastore_path)
        elif datastore_path.is_dir():
            datastore = DataStore().from_dir(datastore_path)
        else:
            raise FileNotFoundError(f"Datastore {datastore_path} not found.")
        ids = set()
        selection = dict()
        for criteria in self.settings["observations"]["filters"]:
            selected_obs = ObservationTable()

            # TODO: Reduce significantly the code.
            # This block would be handled by datastore.obs_table.select_observations
            selection["type"] = criteria["filter_type"]
            for key, val in criteria.items():
                if key in ["lon", "lat", "radius", "border"]:
                    val = Angle(val)
                selection[key] = val
            if selection["type"] == "angle_box":
                selection["type"] = "par_box"
                selection["value_range"] = Angle(criteria["value_range"])
            if selection["type"] == "sky_circle" or selection["type"].endswith("_box"):
                selected_obs = datastore.obs_table.select_observations(selection)
            if selection["type"] == "par_value":
                mask = (
                    datastore.obs_table[criteria["variable"]] == criteria["value_param"]
                )
                selected_obs = datastore.obs_table[mask]
            if selection["type"] == "ids":
                obs_list = datastore.get_observations(criteria["obs_ids"])
                selected_obs["OBS_ID"] = [obs.obs_id for obs in obs_list.list]
            if selection["type"] == "all":
                obs_list = datastore.get_observations()
                selected_obs["OBS_ID"] = [obs.obs_id for obs in obs_list.list]

            if len(selected_obs):
                if "exclude" in criteria and criteria["exclude"]:
                    ids.difference_update(selected_obs["OBS_ID"].tolist())
                else:
                    ids.update(selected_obs["OBS_ID"].tolist())
        self.observations = datastore.get_observations(ids, skip_missing=True)
        for obs in self.observations.list:
            log.info(obs)

    def get_datasets(self):
        """Produce reduced datasets."""
        if not self._validate_reduction_settings():
            return False
        if self.settings["datasets"]["dataset-type"] == "SpectrumDatasetOnOff":
            self._spectrum_extraction()
        elif self.settings["datasets"]["dataset-type"] == "MapDataset":
            self._map_making()
        else:
            # TODO raise error?
            log.info("Data reduction method not available.")
            return False

    def set_model(self, model=None, filename=""):
        """Read the model from dict or filename and attach it to datasets.

        Parameters
        ----------
        model: dict or string
            Dictionary or string in YAML format with the serialized model.
        filename : string
            Name of the model YAML file describing the model.
        """
        if not self._validate_set_model():
            return False
        log.info(f"Reading model.")
        if isinstance(model, str):
            model = yaml.safe_load(model)
        if model:
            self.model = SkyModels(dict_to_models(model))
        elif filename:
            filepath = make_path(filename)
            self.model = SkyModels.from_yaml(filepath)
        else:
            return False
        # TODO: Deal with multiple components
        for dataset in self.datasets.datasets:
            if isinstance(dataset, MapDataset):
                dataset.model = self.model
            else:
                if len(self.model.skymodels) > 1:
                    raise ValueError(
                        "Can only fit a single spectral model at one time."
                    )
                dataset.model = self.model.skymodels[0].spectral_model
        log.info(self.model)

    def run_fit(self, optimize_opts=None):
        """Fitting reduced datasets to model."""
        if not self._validate_fitting_settings():
            return False

        for ds in self.datasets.datasets:
            # TODO: fit_range handled in jsonschema validation class
            if "fit_range" in self.settings["fit"]:
                e_min = u.Quantity(self.settings["fit"]["fit_range"]["min"])
                e_max = u.Quantity(self.settings["fit"]["fit_range"]["max"])
                if isinstance(ds, MapDataset):
                    ds.mask_fit = ds.counts.geom.energy_mask(e_min, e_max)
                else:
                    ds.mask_fit = ds.counts.energy_mask(e_min, e_max)
        log.info("Fitting reduced datasets.")
        self.fit = Fit(self.datasets)
        self.fit_result = self.fit.run(optimize_opts=optimize_opts)
        log.info(self.fit_result)

    def get_flux_points(self, source="source"):
        """Calculate flux points for a specific model component.

        Parameters
        ----------
        source : string
            Name of the model component where to calculate the flux points.
        """
        if not self._validate_fp_settings():
            return False

        # TODO: add "source" to config
        log.info("Calculating flux points.")
        axis_params = self.settings["flux-points"]["fp_binning"]
        e_edges = MapAxis.from_bounds(**axis_params).edges
        flux_point_estimator = FluxPointsEstimator(
            e_edges=e_edges, datasets=self.datasets, source=source
        )
        fp = flux_point_estimator.run()
        fp.table["is_ul"] = fp.table["ts"] < 4
        model = self.model[source].spectral_model.copy()
        self.flux_points = FluxPointsDataset(data=fp, model=model)
        cols = ["e_ref", "ref_flux", "dnde", "dnde_ul", "dnde_err", "is_ul"]
        log.info("\n{}".format(self.flux_points.data.table[cols]))

    @staticmethod
    def _create_geometry(params):
        """Create the geometry."""
        # TODO: handled in jsonschema validation class
        geom_params = copy.deepcopy(params)

        axes = []
        for axis_params in geom_params.get("axes", []):
            ax = MapAxis.from_bounds(**axis_params)
            axes.append(ax)

        geom_params["axes"] = axes
        geom_params["skydir"] = tuple(geom_params["skydir"])
        return WcsGeom.create(**geom_params)

    def _map_making(self):
        """Make maps and datasets for 3d analysis."""
        log.info("Creating geometry.")
        geom = self._create_geometry(self.settings["datasets"]["geom"])

        if "geom-irf" in self.settings["datasets"]:
            geom_irf = self._create_geometry(self.settings["datasets"]["geom-irf"])
        else:
            geom_irf = geom.to_binsz(binsz=BINSZ_IRF)

        offset_max = Angle(self.settings["datasets"]["offset-max"])
        stack_datasets = self.settings["datasets"]["stack-datasets"]
        log.info("Creating datasets.")

        maker = MapDatasetMaker(
            geom=geom,
            geom_true=geom_irf,
            offset_max=offset_max,
        )
        if stack_datasets:
            stacked = MapDataset.create(geom=geom, geom_irf=geom_irf, name="stacked")
            for obs in self.observations:
                dataset = maker.run(obs)
                stacked.stack(dataset)
            self._extract_irf_kernels(stacked)
            datasets = [stacked]
        else:
            datasets = []
            for obs in self.observations:
                dataset = maker.run(obs)
                self._extract_irf_kernels(dataset)
                datasets.append(dataset)

        self.datasets = Datasets(datasets)

    def _extract_irf_kernels(self, dataset):
        # TODO: remove hard-coded default value
        max_radius = self.settings["datasets"].get("psf-kernel-radius", "0.6 deg")
        # TODO: handle IRF maps in fit
        geom = dataset.counts.geom
        geom_irf = dataset.exposure.geom
        position = geom.center_skydir
        geom_psf = geom.to_image().to_cube(geom_irf.axes)
        dataset.psf = dataset.psf.get_psf_kernel(
            position=position, geom=geom_psf, max_radius=max_radius
        )
        e_reco = geom.get_axis_by_name("energy").edges
        dataset.edisp = dataset.edisp.get_energy_dispersion(
            position=position, e_reco=e_reco
        )

    def _set_logging(self):
        """Set logging parameters for API."""
        logging.basicConfig(**self.settings["general"]["logging"])
        log.info(
            "Setting logging config: {!r}".format(self.settings["general"]["logging"])
        )

    def _spectrum_extraction(self):
        """Run all steps for the spectrum extraction."""
        region = self.settings["datasets"]["geom"]["region"]
        log.info("Reducing spectrum datasets.")
        on_lon = Angle(region["center"][0])
        on_lat = Angle(region["center"][1])
        on_center = SkyCoord(on_lon, on_lat, frame=region["frame"])
        on_region = CircleSkyRegion(on_center, Angle(region["radius"]))
        background_params = {"on_region": on_region}
        background = self.settings["datasets"]["background"]
        if "exclusion_mask" in background:
            map_hdu = {}
            filename = background["exclusion_mask"]["filename"]
            if "hdu" in background["exclusion_mask"]:
                map_hdu = {"hdu": background["exclusion_mask"]["hdu"]}
            exclusion_region = Map.read(filename, **map_hdu)
            background_params["exclusion_mask"] = exclusion_region
        if background["background_estimator"] == "reflected":
            self.background_estimator = ReflectedRegionsBackgroundEstimator(
                observations=self.observations, **background_params
            )
            self.background_estimator.run()
        else:
            # TODO: raise error?
            log.info("Background estimation only for reflected regions method.")

        extraction_params = {}
        if "containment_correction" in self.settings["datasets"]:
            extraction_params["containment_correction"] = self.settings["datasets"][
                "containment_correction"
            ]
        params = self.settings["datasets"]["geom"]["axes"][0]
        e_reco = MapAxis.from_bounds(**params).edges
        extraction_params["e_reco"] = e_reco
        extraction_params["e_true"] = None
        self.extraction = SpectrumExtraction(
            observations=self.observations,
            bkg_estimate=self.background_estimator.result,
            **extraction_params,
        )
        self.extraction.run()
        self.datasets = Datasets(self.extraction.spectrum_observations)
        if self.settings["datasets"]["stack-datasets"]:
            stacked = self.datasets.stack_reduce()
            stacked.name = "stacked"
            self.datasets = Datasets([stacked])

    def _validate_reduction_settings(self):
        """Validate settings before proceeding to data reduction."""
        if self.observations and len(self.observations):
            self.config.validate()
            return True
        else:
            log.info("No observations selected.")
            log.info("Data reduction cannot be done.")
            return False

    def _validate_set_model(self):
        if self.datasets and self.datasets.datasets:
            self.config.validate()
            return True
        else:
            log.info("No datasets reduced.")
            return False

    def _validate_fitting_settings(self):
        """Validate settings before proceeding to fit 1D."""
        if not self.model:
            log.info("No model fetched for datasets.")
            log.info("Fit cannot be done.")
            return False
        else:
            return True

    def _validate_fp_settings(self):
        """Validate settings before proceeding to flux points estimation."""
        valid = True
        if self.fit:
            self.config.validate()
        else:
            log.info("No results available from fit.")
            valid = False
        if "flux-points" not in self.settings:
            log.info("No values declared for the energy bins.")
            valid = False
        elif "fp_binning" not in self.settings["flux-points"]:
            log.info("No values declared for the energy bins.")
            valid = False
        if not valid:
            log.info("Flux points calculation cannot be done.")
        return valid
Example #4
0
class Analysis:
    """Config-driven high-level analysis interface.

    It is initialized by default with a set of configuration parameters and values declared in
    an internal high-level interface model, though the user can also provide configuration
    parameters passed as a nested dictionary at the moment of instantiation. In that case these
    parameters will overwrite the default values of those present in the configuration file.

    For more info see  :ref:`analysis`.

    Parameters
    ----------
    config : dict or `AnalysisConfig`
        Configuration options following `AnalysisConfig` schema
    """
    def __init__(self, config):
        self.config = config
        self.config.set_logging()
        self.datastore = None
        self.observations = None
        self.datasets = None
        self.models = None
        self.fit = None
        self.fit_result = None
        self.flux_points = None

    @property
    def config(self):
        """Analysis configuration (`AnalysisConfig`)"""
        return self._config

    @config.setter
    def config(self, value):
        if isinstance(value, dict):
            self._config = AnalysisConfig(**value)
        elif isinstance(value, AnalysisConfig):
            self._config = value
        else:
            raise TypeError("config must be dict or AnalysisConfig.")

    def get_observations(self):
        """Fetch observations from the data store according to criteria defined in the configuration."""
        path = make_path(self.config.observations.datastore)
        if path.is_file():
            self.datastore = DataStore.from_file(path)
        elif path.is_dir():
            self.datastore = DataStore.from_dir(path)
        else:
            raise FileNotFoundError(f"Datastore not found: {path}")

        log.info("Fetching observations.")
        observations_settings = self.config.observations
        if (len(observations_settings.obs_ids)
                and observations_settings.obs_file is not None):
            raise ValueError(
                "Values for both parameters obs_ids and obs_file are not accepted."
            )
        elif (not len(observations_settings.obs_ids)
              and observations_settings.obs_file is None):
            obs_list = self.datastore.get_observations()
            ids = [obs.obs_id for obs in obs_list]
        elif len(observations_settings.obs_ids):
            obs_list = self.datastore.get_observations(
                observations_settings.obs_ids)
            ids = [obs.obs_id for obs in obs_list]
        else:
            path = make_path(self.config.observations.obs_file)
            ids = list(
                Table.read(path, format="ascii", data_start=0).columns[0])

        if observations_settings.obs_cone.lon is not None:
            cone = dict(
                type="sky_circle",
                frame=observations_settings.obs_cone.frame,
                lon=observations_settings.obs_cone.lon,
                lat=observations_settings.obs_cone.lat,
                radius=observations_settings.obs_cone.radius,
                border="0 deg",
            )
            selected_cone = self.datastore.obs_table.select_observations(cone)
            ids = list(set(ids) & set(selected_cone["OBS_ID"].tolist()))
        self.observations = self.datastore.get_observations(ids,
                                                            skip_missing=True)
        if self.config.observations.obs_time.start is not None:
            start = self.config.observations.obs_time.start
            stop = self.config.observations.obs_time.stop
            self.observations = self.observations.select_time([(start, stop)])
        log.info(f"Number of selected observations: {len(self.observations)}")
        for obs in self.observations:
            log.debug(obs)

    def get_datasets(self):
        """Produce reduced datasets."""
        if not self.observations or len(self.observations) == 0:
            raise RuntimeError("No observations have been selected.")

        if self.config.datasets.type == "1d":
            self._spectrum_extraction()
        elif self.config.datasets.type == "3d":
            self._map_making()
        else:
            ValueError(f"Invalid dataset type: {self.config.datasets.type}")

    def set_models(self, models):
        """Set models on datasets.

        Parameters
        ----------
        models : `~gammapy.modeling.models.Models` or str
            Models object or YAML models string
        """
        if not self.datasets or len(self.datasets) == 0:
            raise RuntimeError("Missing datasets")

        log.info(f"Reading model.")
        if isinstance(models, str):
            # FIXME: Models should offer a method to create from YAML str
            models = yaml.safe_load(models)
            self.models = Models(dict_to_models(models))
        elif isinstance(models, Models):
            self.models = models
        else:
            raise TypeError(f"Invalid type: {models!r}")

        for dataset in self.datasets:
            dataset.models = self.models

        log.info(self.models)

    def read_models(self, path):
        """Read models from YAML file."""
        path = make_path(path)
        models = Models.read(path)
        self.set_models(models)

    def run_fit(self, optimize_opts=None):
        """Fitting reduced datasets to model."""
        if not self.models:
            raise RuntimeError("Missing models")

        fit_settings = self.config.fit
        for dataset in self.datasets:
            if fit_settings.fit_range:
                e_min = fit_settings.fit_range.min
                e_max = fit_settings.fit_range.max
                if isinstance(dataset, MapDataset):
                    dataset.mask_fit = dataset.counts.geom.energy_mask(
                        e_min, e_max)
                else:
                    dataset.mask_fit = dataset.counts.energy_mask(e_min, e_max)

        log.info("Fitting datasets.")
        self.fit = Fit(self.datasets)
        self.fit_result = self.fit.run(optimize_opts=optimize_opts)
        log.info(self.fit_result)

    def get_flux_points(self):
        """Calculate flux points for a specific model component."""
        if not self.fit:
            raise RuntimeError("No results available from Fit.")

        fp_settings = self.config.flux_points
        log.info("Calculating flux points.")
        e_edges = self._make_energy_axis(fp_settings.energy).edges
        flux_point_estimator = FluxPointsEstimator(
            e_edges=e_edges,
            datasets=self.datasets,
            source=fp_settings.source,
            **fp_settings.params,
        )
        fp = flux_point_estimator.run()
        fp.table["is_ul"] = fp.table["ts"] < 4
        self.flux_points = FluxPointsDataset(
            data=fp, models=self.models[fp_settings.source])
        cols = ["e_ref", "ref_flux", "dnde", "dnde_ul", "dnde_err", "is_ul"]
        log.info("\n{}".format(self.flux_points.data.table[cols]))

    def update_config(self, config):
        self.config = self.config.update(config=config)

    def _create_geometry(self):
        """Create the geometry."""
        geom_params = {}
        geom_settings = self.config.datasets.geom
        skydir_settings = geom_settings.wcs.skydir
        if skydir_settings.lon is not None:
            skydir = SkyCoord(skydir_settings.lon,
                              skydir_settings.lat,
                              frame=skydir_settings.frame)
            geom_params["skydir"] = skydir
        if skydir_settings.frame == "icrs":
            geom_params["frame"] = "icrs"
        if skydir_settings.frame == "galactic":
            geom_params["frame"] = "galactic"
        axes = [self._make_energy_axis(geom_settings.axes.energy)]
        geom_params["axes"] = axes
        geom_params["binsz"] = geom_settings.wcs.binsize
        width = geom_settings.wcs.fov.width.to("deg").value
        height = geom_settings.wcs.fov.height.to("deg").value
        geom_params["width"] = (width, height)
        return WcsGeom.create(**geom_params)

    def _map_making(self):
        """Make maps and datasets for 3d analysis."""
        log.info("Creating geometry.")

        geom = self._create_geometry()
        geom_settings = self.config.datasets.geom
        geom_irf = dict(energy_axis_true=None, binsz_irf=None)
        if geom_settings.axes.energy_true.min is not None:
            geom_irf["energy_axis_true"] = self._make_energy_axis(
                geom_settings.axes.energy_true)
        geom_irf["binsz_irf"] = geom_settings.wcs.binsize_irf.to("deg").value
        offset_max = geom_settings.selection.offset_max
        log.info("Creating datasets.")

        maker = MapDatasetMaker(selection=self.config.datasets.map_selection)

        safe_mask_selection = self.config.datasets.safe_mask.methods
        safe_mask_settings = self.config.datasets.safe_mask.settings
        maker_safe_mask = SafeMaskMaker(methods=safe_mask_selection,
                                        **safe_mask_settings)
        stacked = MapDataset.create(geom=geom, name="stacked", **geom_irf)

        if self.config.datasets.stack:
            for obs in self.observations:
                log.info(f"Processing observation {obs.obs_id}")
                cutout = stacked.cutout(obs.pointing_radec,
                                        width=2 * offset_max)
                dataset = maker.run(cutout, obs)
                dataset = maker_safe_mask.run(dataset, obs)
                log.debug(dataset)
                stacked.stack(dataset)
            datasets = [stacked]
        else:
            datasets = []
            for obs in self.observations:
                log.info(f"Processing observation {obs.obs_id}")
                cutout = stacked.cutout(obs.pointing_radec,
                                        width=2 * offset_max)
                dataset = maker.run(cutout, obs)
                dataset = maker_safe_mask.run(dataset, obs)
                log.debug(dataset)
                datasets.append(dataset)
        self.datasets = Datasets(datasets)

    def _spectrum_extraction(self):
        """Run all steps for the spectrum extraction."""
        log.info("Reducing spectrum datasets.")
        datasets_settings = self.config.datasets
        on_lon = datasets_settings.on_region.lon
        on_lat = datasets_settings.on_region.lat
        on_center = SkyCoord(on_lon,
                             on_lat,
                             frame=datasets_settings.on_region.frame)
        on_region = CircleSkyRegion(on_center,
                                    datasets_settings.on_region.radius)

        maker_config = {}
        if datasets_settings.containment_correction:
            maker_config[
                "containment_correction"] = datasets_settings.containment_correction
        e_reco = self._make_energy_axis(
            datasets_settings.geom.axes.energy).edges

        maker_config["selection"] = ["counts", "aeff", "edisp"]
        dataset_maker = SpectrumDatasetMaker(**maker_config)
        bkg_maker_config = {}
        if datasets_settings.background.exclusion:
            exclusion_region = Map.read(datasets_settings.background.exclusion)
            bkg_maker_config["exclusion_mask"] = exclusion_region
        bkg_maker = ReflectedRegionsBackgroundMaker(**bkg_maker_config)

        safe_mask_selection = self.config.datasets.safe_mask.methods
        safe_mask_settings = self.config.datasets.safe_mask.settings
        safe_mask_maker = SafeMaskMaker(methods=safe_mask_selection,
                                        **safe_mask_settings)

        e_true = self._make_energy_axis(
            datasets_settings.geom.axes.energy_true).edges

        reference = SpectrumDataset.create(e_reco=e_reco,
                                           e_true=e_true,
                                           region=on_region)

        datasets = []
        for obs in self.observations:
            log.info(f"Processing observation {obs.obs_id}")
            dataset = dataset_maker.run(reference.copy(), obs)
            dataset = bkg_maker.run(dataset, obs)
            if dataset.counts_off is None:
                log.info(
                    f"No OFF region found for observation {obs.obs_id}. Discarding."
                )
                continue
            dataset = safe_mask_maker.run(dataset, obs)
            log.debug(dataset)
            datasets.append(dataset)

        self.datasets = Datasets(datasets)

        if self.config.datasets.stack:
            stacked = self.datasets.stack_reduce(name="stacked")
            self.datasets = Datasets([stacked])

    @staticmethod
    def _make_energy_axis(axis):
        return MapAxis.from_bounds(
            name="energy",
            lo_bnd=axis.min.value,
            hi_bnd=axis.max.to_value(axis.min.unit),
            nbin=axis.nbins,
            unit=axis.min.unit,
            interp="log",
            node_type="edges",
        )