Esempio n. 1
0
    def run_region(self, kr, lon, lat, radius):
        #    TODO: for now we have to read/create the allsky maps each in each job
        #    because we can't pickle <functools._lru_cache_wrapper object
        #    send this back to init when fixed

        # exposure
        exposure_hpx = Map.read(
            "$GAMMAPY_DATA/fermi_3fhl/fermi_3fhl_exposure_cube_hpx.fits.gz"
        )
        exposure_hpx.unit = "cm2 s"

        # iem
        iem_filepath = BASE_PATH / "data" / "gll_iem_v06_extrapolated.fits"
        iem_fermi_extra = Map.read(iem_filepath)
        # norm=1.1, tilt=0.03 see paper appendix A
        model_iem = SkyDiffuseCube(
            iem_fermi_extra, norm=1.1, tilt=0.03, name="iem_extrapolated"
        )

        # ROI
        roi_time = time()
        ROI_pos = SkyCoord(lon, lat, frame="galactic", unit="deg")
        width = 2 * (radius + self.psf_margin)

        # Counts
        counts = Map.create(
            skydir=ROI_pos,
            width=width,
            proj="CAR",
            frame="galactic",
            binsz=1 / 8.0,
            axes=[self.energy_axis],
            dtype=float,
        )
        counts.fill_by_coord(
            {"skycoord": self.events.radec, "energy": self.events.energy}
        )

        axis = MapAxis.from_nodes(
            counts.geom.axes[0].center, name="energy_true", unit="GeV", interp="log"
        )
        wcs = counts.geom.wcs
        geom = WcsGeom(wcs=wcs, npix=counts.geom.npix, axes=[axis])
        coords = geom.get_coord()
        # expo
        data = exposure_hpx.interp_by_coord(coords)
        exposure = WcsNDMap(geom, data, unit=exposure_hpx.unit, dtype=float)

        # read PSF
        psf_kernel = PSFKernel.from_table_psf(
            self.psf, geom, max_radius=self.psf_margin * u.deg
        )

        # Energy Dispersion
        e_true = exposure.geom.axes[0].edges
        e_reco = counts.geom.axes[0].edges
        edisp = EDispKernel.from_diagonal_response(e_true=e_true, e_reco=e_reco)

        # fit mask
        if coords["lon"].min() < 90 * u.deg and coords["lon"].max() > 270 * u.deg:
            coords["lon"][coords["lon"].value > 180] -= 360 * u.deg
        mask = (
            (coords["lon"] >= coords["lon"].min() + self.psf_margin * u.deg)
            & (coords["lon"] <= coords["lon"].max() - self.psf_margin * u.deg)
            & (coords["lat"] >= coords["lat"].min() + self.psf_margin * u.deg)
            & (coords["lat"] <= coords["lat"].max() - self.psf_margin * u.deg)
        )
        mask_fermi = WcsNDMap(counts.geom, mask)

        # IEM
        eval_iem = MapEvaluator(
            model=model_iem, exposure=exposure, psf=psf_kernel, edisp=edisp
        )
        bkg_iem = eval_iem.compute_npred()

        # ISO
        eval_iso = MapEvaluator(model=self.model_iso, exposure=exposure, edisp=edisp)
        bkg_iso = eval_iso.compute_npred()

        # merge iem and iso, only one local normalization is fitted
        dataset_name = "3FHL_ROI_num" + str(kr)
        background_total = bkg_iem + bkg_iso
        background_model = BackgroundModel(
            background_total, name="bkg_iem+iso", datasets_names=[dataset_name]
        )
        background_model.parameters["norm"].min = 0.0

        # Sources model
        in_roi = self.FHL3.positions.galactic.contained_by(wcs)
        FHL3_roi = []
        for ks in range(len(self.FHL3.table)):
            if in_roi[ks] == True:
                model = self.FHL3[ks].sky_model()
                model.spatial_model.parameters.freeze_all()  # freeze spatial
                model.spectral_model.parameters["amplitude"].min = 0.0
                if isinstance(model.spectral_model, PowerLawSpectralModel):
                    model.spectral_model.parameters["index"].min = 0.1
                    model.spectral_model.parameters["index"].max = 10.0
                else:
                    model.spectral_model.parameters["alpha"].min = 0.1
                    model.spectral_model.parameters["alpha"].max = 10.0

                FHL3_roi.append(model)
        model_total = Models([background_model] + FHL3_roi)

        # Dataset
        dataset = MapDataset(
            models=model_total,
            counts=counts,
            exposure=exposure,
            psf=psf_kernel,
            edisp=edisp,
            mask_fit=mask_fermi,
            name=dataset_name,
        )
        cat_stat = dataset.stat_sum()

        datasets = Datasets([dataset])
        fit = Fit(datasets)
        results = fit.run(**self.optimize_opts)
        print("ROI_num", str(kr), "\n", results)
        fit_stat = datasets.stat_sum()

        if results.message != "Optimization failed.":
            datasets.write(path=Path(self.resdir), prefix=dataset.name, overwrite=True)
            np.savez(
                self.resdir / f"3FHL_ROI_num{kr}_fit_infos.npz",
                message=results.message,
                stat=[cat_stat, fit_stat],
            )

            exec_time = time() - roi_time
            print("ROI", kr, " time (s): ", exec_time)

            for model in FHL3_roi:
                if (
                    self.FHL3[model.name].data["ROI_num"] == kr
                    and self.FHL3[model.name].data["Signif_Avg"] >= self.sig_cut
                ):
                    flux_points = FluxPointsEstimator(
                        e_edges=self.El_flux, source=model.name, n_sigma_ul=2,
                    ).run(datasets=datasets)
                    filename = self.resdir / f"{model.name}_flux_points.fits"
                    flux_points.write(filename, overwrite=True)

            exec_time = time() - roi_time - exec_time
            print("ROI", kr, " Flux points time (s): ", exec_time)
Esempio n. 2
0
class Fit:
    """Fit class.

    The fit class provides a uniform interface to multiple fitting backends.
    Currently available: "minuit", "sherpa" and "scipy"

    Parameters
    ----------
    datasets : `Datasets`
        Datasets
    """

    def __init__(self, datasets):
        from gammapy.datasets import Datasets

        self.datasets = Datasets(datasets)

    @lazyproperty
    def _parameters(self):
        return self.datasets.parameters

    @lazyproperty
    def _models(self):
        return self.datasets.models

    def run(self, backend="minuit", optimize_opts=None, covariance_opts=None):
        """
        Run all fitting steps.

        Parameters
        ----------
        backend : str
            Backend used for fitting, default : minuit
        optimize_opts : dict
            Options passed to `Fit.optimize`.
        covariance_opts : dict
            Options passed to `Fit.covariance`.

        Returns
        -------
        fit_result : `FitResult`
            Results
        """

        if optimize_opts is None:
            optimize_opts = {}
        optimize_result = self.optimize(backend, **optimize_opts)

        if covariance_opts is None:
            covariance_opts = {}

        if backend not in registry.register["covariance"]:
            log.warning("No covariance estimate - not supported by this backend.")
            return optimize_result

        covariance_result = self.covariance(backend, **covariance_opts)
        # TODO: not sure how best to report the results
        # back or how to form the FitResult object.
        optimize_result._success = optimize_result.success and covariance_result.success

        return optimize_result

    def optimize(self, backend="minuit", **kwargs):
        """Run the optimization.

        Parameters
        ----------
        backend : str
            Which backend to use (see ``gammapy.modeling.registry``)
        **kwargs : dict
            Keyword arguments passed to the optimizer. For the `"minuit"` backend
            see https://iminuit.readthedocs.io/en/latest/api.html#iminuit.Minuit
            for a detailed description of the available options. If there is an entry
            'migrad_opts', those options will be passed to `iminuit.Minuit.migrad()`.

            For the `"sherpa"` backend you can from the options `method = {"simplex",  "levmar", "moncar", "gridsearch"}`
            Those methods are described and compared in detail on
            http://cxc.cfa.harvard.edu/sherpa/methods/index.html. The available
            options of the optimization methods are described on the following
            pages in detail:

                * http://cxc.cfa.harvard.edu/sherpa/ahelp/neldermead.html
                * http://cxc.cfa.harvard.edu/sherpa/ahelp/montecarlo.html
                * http://cxc.cfa.harvard.edu/sherpa/ahelp/gridsearch.html
                * http://cxc.cfa.harvard.edu/sherpa/ahelp/levmar.html

            For the `"scipy"` backend the available options are desribed in detail here:
            https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.minimize.html

        Returns
        -------
        fit_result : `FitResult`
            Results
        """
        parameters = self._parameters
        parameters.check_limits()

        # TODO: expose options if / when to scale? On the Fit class?
        if np.all(self._models.covariance.data == 0):
            parameters.autoscale()

        compute = registry.get("optimize", backend)
        # TODO: change this calling interface!
        # probably should pass a fit statistic, which has a model, which has parameters
        # and return something simpler, not a tuple of three things
        factors, info, optimizer = compute(
            parameters=parameters, function=self.datasets.stat_sum, **kwargs
        )

        # TODO: Change to a stateless interface for minuit also, or if we must support
        # stateful backends, put a proper, backend-agnostic solution for this.
        # As preliminary solution would like to provide a possibility that the user
        # can access the Minuit object, because it features a lot useful functionality
        if backend == "minuit":
            self.minuit = optimizer

        # Copy final results into the parameters object
        parameters.set_parameter_factors(factors)
        parameters.check_limits()
        return OptimizeResult(
            parameters=parameters,
            total_stat=self.datasets.stat_sum(),
            backend=backend,
            method=kwargs.get("method", backend),
            **info,
        )

    def covariance(self, backend="minuit", **kwargs):
        """Estimate the covariance matrix.

        Assumes that the model parameters are already optimised.

        Parameters
        ----------
        backend : str
            Which backend to use (see ``gammapy.modeling.registry``)

        Returns
        -------
        result : `CovarianceResult`
            Results
        """
        compute = registry.get("covariance", backend)
        parameters = self._parameters

        # TODO: wrap MINUIT in a stateless backend
        with parameters.restore_values:
            if backend == "minuit":
                method = "hesse"
                if hasattr(self, "minuit"):
                    factor_matrix, info = compute(self.minuit)
                else:
                    raise RuntimeError("To use minuit, you must first optimize.")
            else:
                method = ""
                factor_matrix, info = compute(
                    parameters, self.datasets.stat_sum, **kwargs
                )

            covariance = Covariance.from_factor_matrix(
                parameters=self._models.parameters, matrix=factor_matrix
            )
            self._models.covariance = covariance

        # TODO: decide what to return, and fill the info correctly!
        return CovarianceResult(
            backend=backend,
            method=method,
            parameters=parameters,
            success=info["success"],
            message=info["message"],
        )

    def confidence(
        self, parameter, backend="minuit", sigma=1, reoptimize=True, **kwargs
    ):
        """Estimate confidence interval.

        Extra ``kwargs`` are passed to the backend.
        E.g. `iminuit.Minuit.minos` supports a ``maxcall`` option.

        For the scipy backend ``kwargs`` are forwarded to `~scipy.optimize.brentq`. If the
        confidence estimation fails, the bracketing interval can be adapted by modifying the
        the upper bound of the interval (``b``) value.

        Parameters
        ----------
        backend : str
            Which backend to use (see ``gammapy.modeling.registry``)
        parameter : `~gammapy.modeling.Parameter`
            Parameter of interest
        sigma : float
            Number of standard deviations for the confidence level
        reoptimize : bool
            Re-optimize other parameters, when computing the confidence region.
        **kwargs : dict
            Keyword argument passed ot the confidence estimation method.

        Returns
        -------
        result : dict
            Dictionary with keys "errp", 'errn", "success" and "nfev".
        """
        compute = registry.get("confidence", backend)
        parameters = self._parameters
        parameter = parameters[parameter]

        # TODO: wrap MINUIT in a stateless backend
        with parameters.restore_values:
            if backend == "minuit":
                if hasattr(self, "minuit"):
                    # This is ugly. We will access parameters and make a copy
                    # from the backend, to avoid modifying the state
                    result = compute(
                        self.minuit, parameters, parameter, sigma, **kwargs
                    )
                else:
                    raise RuntimeError("To use minuit, you must first optimize.")
            else:
                result = compute(
                    parameters,
                    parameter,
                    self.datasets.stat_sum,
                    sigma,
                    reoptimize,
                    **kwargs,
                )

        result["errp"] *= parameter.scale
        result["errn"] *= parameter.scale
        return result

    def stat_profile(
        self,
        parameter,
        values=None,
        bounds=2,
        nvalues=11,
        reoptimize=False,
        optimize_opts=None,
    ):
        """Compute fit statistic profile.

        The method used is to vary one parameter, keeping all others fixed.
        So this is taking a "slice" or "scan" of the fit statistic.

        See also: `Fit.minos_profile`.

        Parameters
        ----------
        parameter : `~gammapy.modeling.Parameter`
            Parameter of interest
        values : `~astropy.units.Quantity` (optional)
            Parameter values to evaluate the fit statistic for.
        bounds : int or tuple of float
            When an `int` is passed the bounds are computed from `bounds * sigma`
            from the best fit value of the parameter, where `sigma` corresponds to
            the one sigma error on the parameter. If a tuple of floats is given
            those are taken as the min and max values and ``nvalues`` are linearly
            spaced between those.
        nvalues : int
            Number of parameter grid points to use.
        reoptimize : bool
            Re-optimize other parameters, when computing the fit statistic profile.

        Returns
        -------
        results : dict
            Dictionary with keys "values" and "stat".
        """
        parameters = self._parameters
        parameter = parameters[parameter]

        optimize_opts = optimize_opts or {}

        if values is None:
            if isinstance(bounds, tuple):
                parmin, parmax = bounds
            else:
                if np.isnan(parameter.error):
                    raise ValueError("Parameter error is not properly set.")
                parerr = parameter.error
                parval = parameter.value
                parmin, parmax = parval - bounds * parerr, parval + bounds * parerr

            values = np.linspace(parmin, parmax, nvalues)

        stats = []
        with parameters.restore_values:
            for value in values:
                parameter.value = value
                if reoptimize:
                    parameter.frozen = True
                    result = self.optimize(**optimize_opts)
                    stat = result.total_stat
                else:
                    stat = self.datasets.stat_sum()
                stats.append(stat)

        return {"values": values, "stat": np.array(stats)}

    def stat_surface(
        self, x, y, x_values, y_values, reoptimize=False, **optimize_opts
    ):
        """Compute fit statistic surface.

        The method used is to vary two parameters, keeping all others fixed.
        So this is taking a "slice" or "scan" of the fit statistic.

        Caveat: This method can be very computationally intensive and slow

        See also: `Fit.minos_contour`

        Parameters
        ----------
        x, y : `~gammapy.modeling.Parameter`
            Parameters of interest
        x_values, y_values : list or `numpy.ndarray`
            Parameter values to evaluate the fit statistic for.
        reoptimize : bool
            Re-optimize other parameters, when computing the fit statistic profile.
        **optimize_opts : dict
            Keyword arguments passed to the optimizer. See `Fit.optimize` for further details.

        Returns
        -------
        results : dict
            Dictionary with keys "x_values", "y_values" and "stat".

        """
        parameters = self._parameters
        x = parameters[x]
        y = parameters[y]

        stats = []
        with parameters.restore_values:
            for x_value, y_value in itertools.product(x_values, y_values):
                # TODO: Remove log.info() and provide a nice progress bar
                log.info(f"Processing: x={x_value}, y={y_value}")
                x.value = x_value
                y.value = y_value
                if reoptimize:
                    x.frozen = True
                    y.frozen = True
                    result = self.optimize(**optimize_opts)
                    stat = result.total_stat
                else:
                    stat = self.datasets.stat_sum()

                stats.append(stat)

        stats = np.array(stats)
        stats = stats.reshape(
            (np.asarray(x_values).shape[0], np.asarray(y_values).shape[0])
        )

        return {"x_values": x_values, "y_values": y_values, "stat": stats}

    def minos_contour(self, x, y, numpoints=10, sigma=1.0):
        """Compute MINOS contour.

        Calls ``iminuit.Minuit.mncontour``.

        This is a contouring algorithm for a 2D function
        which is not simply the fit statistic function.
        That 2D function is given at each point ``(par_1, par_2)``
        by re-optimising all other free parameters,
        and taking the fit statistic at that point.

        Very compute-intensive and slow.

        Parameters
        ----------
        x, y : `~gammapy.modeling.Parameter`
            Parameters of interest
        numpoints : int
            Number of contour points
        sigma : float
            Number of standard deviations for the confidence level

        Returns
        -------
        result : dict
            Dictionary with keys "x", "y" (Numpy arrays with contour points)
            and a boolean flag "success".
            The result objects from ``mncontour`` are in the additional
            keys "x_info" and "y_info".
        """
        parameters = self._parameters
        x = parameters[x]
        y = parameters[y]

        with parameters.restore_values:
            result = mncontour(self.minuit, parameters, x, y, numpoints, sigma)

        x = result["x"] * x.scale
        y = result["y"] * y.scale

        return {
            "x": x,
            "y": y,
            "success": result["success"],
            "x_info": result["x_info"],
            "y_info": result["y_info"],
        }
Esempio n. 3
0
    def run_region(self, kr, lon, lat, radius):
        #    TODO: for now we have to read/create the allsky maps each in each job
        #    because we can't pickle <functools._lru_cache_wrapper object
        #    send this back to init when fixed

        log.info(f"ROI {kr}: loading data")

        # exposure
        exposure_hpx = Map.read(
            "$GAMMAPY_DATA/fermi_3fhl/fermi_3fhl_exposure_cube_hpx.fits.gz")
        exposure_hpx.unit = "cm2 s"

        # psf
        psf_map = PSFMap.read(
            "$GAMMAPY_DATA/fermi_3fhl/fermi_3fhl_psf_gc.fits.gz",
            format="gtpsf")
        # reduce size of the PSF
        axis = psf_map.psf_map.geom.axes["rad"].center.to_value(u.deg)
        indmax = np.argmin(np.abs(self.psf_margin - axis))
        psf_map = psf_map.slice_by_idx(slices={"rad": slice(0, indmax)})

        # iem
        iem_filepath = BASE_PATH / "data" / "gll_iem_v06_extrapolated.fits"
        iem_fermi_extra = Map.read(iem_filepath)
        # norm=1.1, tilt=0.03 see paper appendix A
        model_iem = SkyModel(
            PowerLawNormSpectralModel(norm=1.1, tilt=0.03),
            TemplateSpatialModel(iem_fermi_extra, normalize=False),
            name="iem_extrapolated",
        )

        # ROI
        roi_time = time()
        ROI_pos = SkyCoord(lon, lat, frame="galactic", unit="deg")
        width = 2 * (radius + self.psf_margin)

        # Counts
        counts = Map.create(
            skydir=ROI_pos,
            width=width,
            proj="CAR",
            frame="galactic",
            binsz=1 / 8.0,
            axes=[self.energy_axis],
            dtype=float,
        )
        counts.fill_by_coord({
            "skycoord": self.events.radec,
            "energy": self.events.energy
        })

        axis = MapAxis.from_nodes(counts.geom.axes[0].center,
                                  name="energy_true",
                                  unit="GeV",
                                  interp="log")
        wcs = counts.geom.wcs
        geom = WcsGeom(wcs=wcs, npix=counts.geom.npix, axes=[axis])
        coords = geom.get_coord()
        # expo
        data = exposure_hpx.interp_by_coord(coords)
        exposure = WcsNDMap(geom, data, unit=exposure_hpx.unit, dtype=float)

        # Energy Dispersion
        edisp = EDispKernelMap.from_diagonal_response(
            energy_axis_true=axis, energy_axis=self.energy_axis)

        # fit mask
        if coords["lon"].min() < 90 * u.deg and coords["lon"].max(
        ) > 270 * u.deg:
            coords["lon"][coords["lon"].value > 180] -= 360 * u.deg
        mask = (
            (coords["lon"] >= coords["lon"].min() + self.psf_margin * u.deg)
            & (coords["lon"] <= coords["lon"].max() - self.psf_margin * u.deg)
            & (coords["lat"] >= coords["lat"].min() + self.psf_margin * u.deg)
            & (coords["lat"] <= coords["lat"].max() - self.psf_margin * u.deg))
        mask_fermi = WcsNDMap(counts.geom, mask)
        mask_safe_fermi = WcsNDMap(counts.geom, np.ones(mask.shape,
                                                        dtype=bool))

        log.info(f"ROI {kr}: pre-computing diffuse")

        # IEM
        eval_iem = MapEvaluator(
            model=model_iem,
            exposure=exposure,
            psf=psf_map.get_psf_kernel(geom),
            edisp=edisp.get_edisp_kernel(),
        )
        bkg_iem = eval_iem.compute_npred()

        # ISO
        eval_iso = MapEvaluator(model=self.model_iso,
                                exposure=exposure,
                                edisp=edisp.get_edisp_kernel())
        bkg_iso = eval_iso.compute_npred()

        # merge iem and iso, only one local normalization is fitted
        dataset_name = "3FHL_ROI_num" + str(kr)
        background_total = bkg_iem + bkg_iso

        # Dataset
        dataset = MapDataset(
            counts=counts,
            exposure=exposure,
            background=background_total,
            psf=psf_map,
            edisp=edisp,
            mask_fit=mask_fermi,
            mask_safe=mask_safe_fermi,
            name=dataset_name,
        )

        background_model = FoVBackgroundModel(dataset_name=dataset_name)
        background_model.parameters["norm"].min = 0.0

        # Sources model
        in_roi = self.FHL3.positions.galactic.contained_by(wcs)
        FHL3_roi = []
        for ks in range(len(self.FHL3.table)):
            if in_roi[ks] == True:
                model = self.FHL3[ks].sky_model()
                model.spatial_model.parameters.freeze_all()  # freeze spatial
                model.spectral_model.parameters["amplitude"].min = 0.0
                if isinstance(model.spectral_model, PowerLawSpectralModel):
                    model.spectral_model.parameters["index"].min = 0.1
                    model.spectral_model.parameters["index"].max = 10.0
                else:
                    model.spectral_model.parameters["alpha"].min = 0.1
                    model.spectral_model.parameters["alpha"].max = 10.0

                FHL3_roi.append(model)
        model_total = Models(FHL3_roi + [background_model])
        dataset.models = model_total

        cat_stat = dataset.stat_sum()
        datasets = Datasets([dataset])

        log.info(f"ROI {kr}: running fit")
        fit = Fit(**self.fit_opts)
        results = fit.run(datasets=datasets)
        print("ROI_num", str(kr), "\n", results)
        fit_stat = datasets.stat_sum()

        if results.message != "Optimization failed.":
            filedata = Path(self.resdir) / f"3FHL_ROI_num{kr}_datasets.yaml"
            filemodel = Path(self.resdir) / f"3FHL_ROI_num{kr}_models.yaml"
            datasets.write(filedata, filemodel, overwrite=True)
            np.savez(
                self.resdir / f"3FHL_ROI_num{kr}_fit_infos.npz",
                message=results.message,
                stat=[cat_stat, fit_stat],
            )

            exec_time = time() - roi_time
            print("ROI", kr, " time (s): ", exec_time)

            log.info(f"ROI {kr}: running flux points")
            for model in FHL3_roi:
                if (self.FHL3[model.name].data["ROI_num"] == kr
                        and self.FHL3[model.name].data["Signif_Avg"] >=
                        self.sig_cut):
                    print(model.name)
                    flux_points = FluxPointsEstimator(
                        energy_edges=self.El_flux,
                        source=model.name,
                        n_sigma_ul=2,
                        selection_optional=["ul"],
                    ).run(datasets=datasets)
                    flux_points.meta["sqrt_ts_threshold_ul"] = 1

                    filename = self.resdir / f"{model.name}_flux_points.fits"
                    flux_points.write(filename, overwrite=True)

            exec_time = time() - roi_time - exec_time
            print("ROI", kr, " Flux points time (s): ", exec_time)