def test_datasets_stack_reduce(): obs_ids = [23523, 23526, 23559, 23592] dataset_list = [] for obs in obs_ids: filename = "$GAMMAPY_DATA/joint-crab/spectra/hess/pha_obs{}.fits" ds = SpectrumDatasetOnOff.from_ogip_files(filename.format(obs)) dataset_list.append(ds) datasets = Datasets(dataset_list) stacked = datasets.stack_reduce() assert_allclose(stacked.livetime.to_value("s"), 6313.8116406202325)
def test_datasets_stack_reduce(): obs_ids = [23523, 23526, 23559, 23592] dataset_list = [] for obs in obs_ids: filename = "$GAMMAPY_DATA/joint-crab/spectra/hess/pha_obs{}.fits" ds = SpectrumDatasetOnOff.from_ogip_files(filename.format(obs)) dataset_list.append(ds) datasets = Datasets(dataset_list) stacked = datasets.stack_reduce() assert_allclose(stacked.livetime.to_value("s"), 6313.8116406202325) info_table = datasets.info_table() assert_allclose(info_table["n_on"], [124, 126, 119, 90]) info_table_cum = datasets.info_table(cumulative=True) assert_allclose(info_table_cum["n_on"], [124, 250, 369, 459])
class Analysis: """Config-driven high-level analysis interface. It is initialized by default with a set of configuration parameters and values declared in an internal configuration schema YAML file, though the user can also provide configuration parameters passed as a nested dictionary at the moment of instantiation. In that case these parameters will overwrite the default values of those present in the configuration file. For more info see :ref:`HLI`. Parameters ---------- config : dict or `AnalysisConfig` Configuration options following `AnalysisConfig` schema """ def __init__(self, config=None): if isinstance(config, dict): self._config = AnalysisConfig(config) elif isinstance(config, AnalysisConfig): self._config = config else: raise ValueError("Dict or `AnalysiConfig` object required.") self._set_logging() self.observations = None self.background_estimator = None self.datasets = None self.extraction = None self.model = None self.fit = None self.fit_result = None self.flux_points = None @property def config(self): """Analysis configuration (`AnalysisConfig`)""" return self._config @property def settings(self): """Configuration settings for the analysis session.""" return self.config.settings def get_observations(self): """Fetch observations from the data store according to criteria defined in the configuration.""" self.config.validate() log.info("Fetching observations.") datastore_path = make_path(self.settings["observations"]["datastore"]) if datastore_path.is_file(): datastore = DataStore().from_file(datastore_path) elif datastore_path.is_dir(): datastore = DataStore().from_dir(datastore_path) else: raise FileNotFoundError(f"Datastore {datastore_path} not found.") ids = set() selection = dict() for criteria in self.settings["observations"]["filters"]: selected_obs = ObservationTable() # TODO: Reduce significantly the code. # This block would be handled by datastore.obs_table.select_observations selection["type"] = criteria["filter_type"] for key, val in criteria.items(): if key in ["lon", "lat", "radius", "border"]: val = Angle(val) selection[key] = val if selection["type"] == "angle_box": selection["type"] = "par_box" selection["value_range"] = Angle(criteria["value_range"]) if selection["type"] == "sky_circle" or selection["type"].endswith("_box"): selected_obs = datastore.obs_table.select_observations(selection) if selection["type"] == "par_value": mask = ( datastore.obs_table[criteria["variable"]] == criteria["value_param"] ) selected_obs = datastore.obs_table[mask] if selection["type"] == "ids": obs_list = datastore.get_observations(criteria["obs_ids"]) selected_obs["OBS_ID"] = [obs.obs_id for obs in obs_list.list] if selection["type"] == "all": obs_list = datastore.get_observations() selected_obs["OBS_ID"] = [obs.obs_id for obs in obs_list.list] if len(selected_obs): if "exclude" in criteria and criteria["exclude"]: ids.difference_update(selected_obs["OBS_ID"].tolist()) else: ids.update(selected_obs["OBS_ID"].tolist()) self.observations = datastore.get_observations(ids, skip_missing=True) for obs in self.observations.list: log.info(obs) def get_datasets(self): """Produce reduced datasets.""" if not self._validate_reduction_settings(): return False if self.settings["datasets"]["dataset-type"] == "SpectrumDatasetOnOff": self._spectrum_extraction() elif self.settings["datasets"]["dataset-type"] == "MapDataset": self._map_making() else: # TODO raise error? log.info("Data reduction method not available.") return False def set_model(self, model=None, filename=""): """Read the model from dict or filename and attach it to datasets. Parameters ---------- model: dict or string Dictionary or string in YAML format with the serialized model. filename : string Name of the model YAML file describing the model. """ if not self._validate_set_model(): return False log.info(f"Reading model.") if isinstance(model, str): model = yaml.safe_load(model) if model: self.model = SkyModels(dict_to_models(model)) elif filename: filepath = make_path(filename) self.model = SkyModels.from_yaml(filepath) else: return False # TODO: Deal with multiple components for dataset in self.datasets.datasets: if isinstance(dataset, MapDataset): dataset.model = self.model else: if len(self.model.skymodels) > 1: raise ValueError( "Can only fit a single spectral model at one time." ) dataset.model = self.model.skymodels[0].spectral_model log.info(self.model) def run_fit(self, optimize_opts=None): """Fitting reduced datasets to model.""" if not self._validate_fitting_settings(): return False for ds in self.datasets.datasets: # TODO: fit_range handled in jsonschema validation class if "fit_range" in self.settings["fit"]: e_min = u.Quantity(self.settings["fit"]["fit_range"]["min"]) e_max = u.Quantity(self.settings["fit"]["fit_range"]["max"]) if isinstance(ds, MapDataset): ds.mask_fit = ds.counts.geom.energy_mask(e_min, e_max) else: ds.mask_fit = ds.counts.energy_mask(e_min, e_max) log.info("Fitting reduced datasets.") self.fit = Fit(self.datasets) self.fit_result = self.fit.run(optimize_opts=optimize_opts) log.info(self.fit_result) def get_flux_points(self, source="source"): """Calculate flux points for a specific model component. Parameters ---------- source : string Name of the model component where to calculate the flux points. """ if not self._validate_fp_settings(): return False # TODO: add "source" to config log.info("Calculating flux points.") axis_params = self.settings["flux-points"]["fp_binning"] e_edges = MapAxis.from_bounds(**axis_params).edges flux_point_estimator = FluxPointsEstimator( e_edges=e_edges, datasets=self.datasets, source=source ) fp = flux_point_estimator.run() fp.table["is_ul"] = fp.table["ts"] < 4 model = self.model[source].spectral_model.copy() self.flux_points = FluxPointsDataset(data=fp, model=model) cols = ["e_ref", "ref_flux", "dnde", "dnde_ul", "dnde_err", "is_ul"] log.info("\n{}".format(self.flux_points.data.table[cols])) @staticmethod def _create_geometry(params): """Create the geometry.""" # TODO: handled in jsonschema validation class geom_params = copy.deepcopy(params) axes = [] for axis_params in geom_params.get("axes", []): ax = MapAxis.from_bounds(**axis_params) axes.append(ax) geom_params["axes"] = axes geom_params["skydir"] = tuple(geom_params["skydir"]) return WcsGeom.create(**geom_params) def _map_making(self): """Make maps and datasets for 3d analysis.""" log.info("Creating geometry.") geom = self._create_geometry(self.settings["datasets"]["geom"]) if "geom-irf" in self.settings["datasets"]: geom_irf = self._create_geometry(self.settings["datasets"]["geom-irf"]) else: geom_irf = geom.to_binsz(binsz=BINSZ_IRF) offset_max = Angle(self.settings["datasets"]["offset-max"]) stack_datasets = self.settings["datasets"]["stack-datasets"] log.info("Creating datasets.") maker = MapDatasetMaker( geom=geom, geom_true=geom_irf, offset_max=offset_max, ) if stack_datasets: stacked = MapDataset.create(geom=geom, geom_irf=geom_irf, name="stacked") for obs in self.observations: dataset = maker.run(obs) stacked.stack(dataset) self._extract_irf_kernels(stacked) datasets = [stacked] else: datasets = [] for obs in self.observations: dataset = maker.run(obs) self._extract_irf_kernels(dataset) datasets.append(dataset) self.datasets = Datasets(datasets) def _extract_irf_kernels(self, dataset): # TODO: remove hard-coded default value max_radius = self.settings["datasets"].get("psf-kernel-radius", "0.6 deg") # TODO: handle IRF maps in fit geom = dataset.counts.geom geom_irf = dataset.exposure.geom position = geom.center_skydir geom_psf = geom.to_image().to_cube(geom_irf.axes) dataset.psf = dataset.psf.get_psf_kernel( position=position, geom=geom_psf, max_radius=max_radius ) e_reco = geom.get_axis_by_name("energy").edges dataset.edisp = dataset.edisp.get_energy_dispersion( position=position, e_reco=e_reco ) def _set_logging(self): """Set logging parameters for API.""" logging.basicConfig(**self.settings["general"]["logging"]) log.info( "Setting logging config: {!r}".format(self.settings["general"]["logging"]) ) def _spectrum_extraction(self): """Run all steps for the spectrum extraction.""" region = self.settings["datasets"]["geom"]["region"] log.info("Reducing spectrum datasets.") on_lon = Angle(region["center"][0]) on_lat = Angle(region["center"][1]) on_center = SkyCoord(on_lon, on_lat, frame=region["frame"]) on_region = CircleSkyRegion(on_center, Angle(region["radius"])) background_params = {"on_region": on_region} background = self.settings["datasets"]["background"] if "exclusion_mask" in background: map_hdu = {} filename = background["exclusion_mask"]["filename"] if "hdu" in background["exclusion_mask"]: map_hdu = {"hdu": background["exclusion_mask"]["hdu"]} exclusion_region = Map.read(filename, **map_hdu) background_params["exclusion_mask"] = exclusion_region if background["background_estimator"] == "reflected": self.background_estimator = ReflectedRegionsBackgroundEstimator( observations=self.observations, **background_params ) self.background_estimator.run() else: # TODO: raise error? log.info("Background estimation only for reflected regions method.") extraction_params = {} if "containment_correction" in self.settings["datasets"]: extraction_params["containment_correction"] = self.settings["datasets"][ "containment_correction" ] params = self.settings["datasets"]["geom"]["axes"][0] e_reco = MapAxis.from_bounds(**params).edges extraction_params["e_reco"] = e_reco extraction_params["e_true"] = None self.extraction = SpectrumExtraction( observations=self.observations, bkg_estimate=self.background_estimator.result, **extraction_params, ) self.extraction.run() self.datasets = Datasets(self.extraction.spectrum_observations) if self.settings["datasets"]["stack-datasets"]: stacked = self.datasets.stack_reduce() stacked.name = "stacked" self.datasets = Datasets([stacked]) def _validate_reduction_settings(self): """Validate settings before proceeding to data reduction.""" if self.observations and len(self.observations): self.config.validate() return True else: log.info("No observations selected.") log.info("Data reduction cannot be done.") return False def _validate_set_model(self): if self.datasets and self.datasets.datasets: self.config.validate() return True else: log.info("No datasets reduced.") return False def _validate_fitting_settings(self): """Validate settings before proceeding to fit 1D.""" if not self.model: log.info("No model fetched for datasets.") log.info("Fit cannot be done.") return False else: return True def _validate_fp_settings(self): """Validate settings before proceeding to flux points estimation.""" valid = True if self.fit: self.config.validate() else: log.info("No results available from fit.") valid = False if "flux-points" not in self.settings: log.info("No values declared for the energy bins.") valid = False elif "fp_binning" not in self.settings["flux-points"]: log.info("No values declared for the energy bins.") valid = False if not valid: log.info("Flux points calculation cannot be done.") return valid
class Analysis: """Config-driven high-level analysis interface. It is initialized by default with a set of configuration parameters and values declared in an internal high-level interface model, though the user can also provide configuration parameters passed as a nested dictionary at the moment of instantiation. In that case these parameters will overwrite the default values of those present in the configuration file. For more info see :ref:`analysis`. Parameters ---------- config : dict or `AnalysisConfig` Configuration options following `AnalysisConfig` schema """ def __init__(self, config): self.config = config self.config.set_logging() self.datastore = None self.observations = None self.datasets = None self.models = None self.fit = None self.fit_result = None self.flux_points = None @property def config(self): """Analysis configuration (`AnalysisConfig`)""" return self._config @config.setter def config(self, value): if isinstance(value, dict): self._config = AnalysisConfig(**value) elif isinstance(value, AnalysisConfig): self._config = value else: raise TypeError("config must be dict or AnalysisConfig.") def get_observations(self): """Fetch observations from the data store according to criteria defined in the configuration.""" path = make_path(self.config.observations.datastore) if path.is_file(): self.datastore = DataStore.from_file(path) elif path.is_dir(): self.datastore = DataStore.from_dir(path) else: raise FileNotFoundError(f"Datastore not found: {path}") log.info("Fetching observations.") observations_settings = self.config.observations if (len(observations_settings.obs_ids) and observations_settings.obs_file is not None): raise ValueError( "Values for both parameters obs_ids and obs_file are not accepted." ) elif (not len(observations_settings.obs_ids) and observations_settings.obs_file is None): obs_list = self.datastore.get_observations() ids = [obs.obs_id for obs in obs_list] elif len(observations_settings.obs_ids): obs_list = self.datastore.get_observations( observations_settings.obs_ids) ids = [obs.obs_id for obs in obs_list] else: path = make_path(self.config.observations.obs_file) ids = list( Table.read(path, format="ascii", data_start=0).columns[0]) if observations_settings.obs_cone.lon is not None: cone = dict( type="sky_circle", frame=observations_settings.obs_cone.frame, lon=observations_settings.obs_cone.lon, lat=observations_settings.obs_cone.lat, radius=observations_settings.obs_cone.radius, border="0 deg", ) selected_cone = self.datastore.obs_table.select_observations(cone) ids = list(set(ids) & set(selected_cone["OBS_ID"].tolist())) self.observations = self.datastore.get_observations(ids, skip_missing=True) if self.config.observations.obs_time.start is not None: start = self.config.observations.obs_time.start stop = self.config.observations.obs_time.stop self.observations = self.observations.select_time([(start, stop)]) log.info(f"Number of selected observations: {len(self.observations)}") for obs in self.observations: log.debug(obs) def get_datasets(self): """Produce reduced datasets.""" if not self.observations or len(self.observations) == 0: raise RuntimeError("No observations have been selected.") if self.config.datasets.type == "1d": self._spectrum_extraction() elif self.config.datasets.type == "3d": self._map_making() else: ValueError(f"Invalid dataset type: {self.config.datasets.type}") def set_models(self, models): """Set models on datasets. Parameters ---------- models : `~gammapy.modeling.models.Models` or str Models object or YAML models string """ if not self.datasets or len(self.datasets) == 0: raise RuntimeError("Missing datasets") log.info(f"Reading model.") if isinstance(models, str): # FIXME: Models should offer a method to create from YAML str models = yaml.safe_load(models) self.models = Models(dict_to_models(models)) elif isinstance(models, Models): self.models = models else: raise TypeError(f"Invalid type: {models!r}") for dataset in self.datasets: dataset.models = self.models log.info(self.models) def read_models(self, path): """Read models from YAML file.""" path = make_path(path) models = Models.read(path) self.set_models(models) def run_fit(self, optimize_opts=None): """Fitting reduced datasets to model.""" if not self.models: raise RuntimeError("Missing models") fit_settings = self.config.fit for dataset in self.datasets: if fit_settings.fit_range: e_min = fit_settings.fit_range.min e_max = fit_settings.fit_range.max if isinstance(dataset, MapDataset): dataset.mask_fit = dataset.counts.geom.energy_mask( e_min, e_max) else: dataset.mask_fit = dataset.counts.energy_mask(e_min, e_max) log.info("Fitting datasets.") self.fit = Fit(self.datasets) self.fit_result = self.fit.run(optimize_opts=optimize_opts) log.info(self.fit_result) def get_flux_points(self): """Calculate flux points for a specific model component.""" if not self.fit: raise RuntimeError("No results available from Fit.") fp_settings = self.config.flux_points log.info("Calculating flux points.") e_edges = self._make_energy_axis(fp_settings.energy).edges flux_point_estimator = FluxPointsEstimator( e_edges=e_edges, datasets=self.datasets, source=fp_settings.source, **fp_settings.params, ) fp = flux_point_estimator.run() fp.table["is_ul"] = fp.table["ts"] < 4 self.flux_points = FluxPointsDataset( data=fp, models=self.models[fp_settings.source]) cols = ["e_ref", "ref_flux", "dnde", "dnde_ul", "dnde_err", "is_ul"] log.info("\n{}".format(self.flux_points.data.table[cols])) def update_config(self, config): self.config = self.config.update(config=config) def _create_geometry(self): """Create the geometry.""" geom_params = {} geom_settings = self.config.datasets.geom skydir_settings = geom_settings.wcs.skydir if skydir_settings.lon is not None: skydir = SkyCoord(skydir_settings.lon, skydir_settings.lat, frame=skydir_settings.frame) geom_params["skydir"] = skydir if skydir_settings.frame == "icrs": geom_params["frame"] = "icrs" if skydir_settings.frame == "galactic": geom_params["frame"] = "galactic" axes = [self._make_energy_axis(geom_settings.axes.energy)] geom_params["axes"] = axes geom_params["binsz"] = geom_settings.wcs.binsize width = geom_settings.wcs.fov.width.to("deg").value height = geom_settings.wcs.fov.height.to("deg").value geom_params["width"] = (width, height) return WcsGeom.create(**geom_params) def _map_making(self): """Make maps and datasets for 3d analysis.""" log.info("Creating geometry.") geom = self._create_geometry() geom_settings = self.config.datasets.geom geom_irf = dict(energy_axis_true=None, binsz_irf=None) if geom_settings.axes.energy_true.min is not None: geom_irf["energy_axis_true"] = self._make_energy_axis( geom_settings.axes.energy_true) geom_irf["binsz_irf"] = geom_settings.wcs.binsize_irf.to("deg").value offset_max = geom_settings.selection.offset_max log.info("Creating datasets.") maker = MapDatasetMaker(selection=self.config.datasets.map_selection) safe_mask_selection = self.config.datasets.safe_mask.methods safe_mask_settings = self.config.datasets.safe_mask.settings maker_safe_mask = SafeMaskMaker(methods=safe_mask_selection, **safe_mask_settings) stacked = MapDataset.create(geom=geom, name="stacked", **geom_irf) if self.config.datasets.stack: for obs in self.observations: log.info(f"Processing observation {obs.obs_id}") cutout = stacked.cutout(obs.pointing_radec, width=2 * offset_max) dataset = maker.run(cutout, obs) dataset = maker_safe_mask.run(dataset, obs) log.debug(dataset) stacked.stack(dataset) datasets = [stacked] else: datasets = [] for obs in self.observations: log.info(f"Processing observation {obs.obs_id}") cutout = stacked.cutout(obs.pointing_radec, width=2 * offset_max) dataset = maker.run(cutout, obs) dataset = maker_safe_mask.run(dataset, obs) log.debug(dataset) datasets.append(dataset) self.datasets = Datasets(datasets) def _spectrum_extraction(self): """Run all steps for the spectrum extraction.""" log.info("Reducing spectrum datasets.") datasets_settings = self.config.datasets on_lon = datasets_settings.on_region.lon on_lat = datasets_settings.on_region.lat on_center = SkyCoord(on_lon, on_lat, frame=datasets_settings.on_region.frame) on_region = CircleSkyRegion(on_center, datasets_settings.on_region.radius) maker_config = {} if datasets_settings.containment_correction: maker_config[ "containment_correction"] = datasets_settings.containment_correction e_reco = self._make_energy_axis( datasets_settings.geom.axes.energy).edges maker_config["selection"] = ["counts", "aeff", "edisp"] dataset_maker = SpectrumDatasetMaker(**maker_config) bkg_maker_config = {} if datasets_settings.background.exclusion: exclusion_region = Map.read(datasets_settings.background.exclusion) bkg_maker_config["exclusion_mask"] = exclusion_region bkg_maker = ReflectedRegionsBackgroundMaker(**bkg_maker_config) safe_mask_selection = self.config.datasets.safe_mask.methods safe_mask_settings = self.config.datasets.safe_mask.settings safe_mask_maker = SafeMaskMaker(methods=safe_mask_selection, **safe_mask_settings) e_true = self._make_energy_axis( datasets_settings.geom.axes.energy_true).edges reference = SpectrumDataset.create(e_reco=e_reco, e_true=e_true, region=on_region) datasets = [] for obs in self.observations: log.info(f"Processing observation {obs.obs_id}") dataset = dataset_maker.run(reference.copy(), obs) dataset = bkg_maker.run(dataset, obs) if dataset.counts_off is None: log.info( f"No OFF region found for observation {obs.obs_id}. Discarding." ) continue dataset = safe_mask_maker.run(dataset, obs) log.debug(dataset) datasets.append(dataset) self.datasets = Datasets(datasets) if self.config.datasets.stack: stacked = self.datasets.stack_reduce(name="stacked") self.datasets = Datasets([stacked]) @staticmethod def _make_energy_axis(axis): return MapAxis.from_bounds( name="energy", lo_bnd=axis.min.value, hi_bnd=axis.max.to_value(axis.min.unit), nbin=axis.nbins, unit=axis.min.unit, interp="log", node_type="edges", )