Example #1
0
def xy_fitted_joint_likelihood(xy_model_and_datalist):

    model, data = xy_model_and_datalist

    jl = JointLikelihood(model, data)
    res_frame, like_frame = jl.fit()

    return jl, res_frame, like_frame
Example #2
0
def test_fit():

    model, datalist= get_model_and_datalist()

    jl = JointLikelihood(model, datalist)

    jl.fit()

    _ = display_photometry_model_magnitudes(jl)
Example #3
0
    def restore_best_fit_model(self, interval):

        # Get sub-frame containing the results for the requested interval

        sub_frame = self._data_frame.loc[interval]

        # Get the model for this interval
        this_model = self._get_model(interval)

        # Get data for this interval
        this_data = self._get_data(interval)

        # Instance a useless joint likelihood object so that plugins have the chance to add nuisance parameters to the
        # model

        _ = JointLikelihood(this_model, this_data)

        # Restore best fit parameters
        for parameter in this_model.free_parameters:

            this_model[parameter].value = sub_frame["value"][parameter]

        return this_model, this_data
Example #4
0
    def fit(self, function, minimizer='minuit', verbose=False):
        """
        Fit the data with the provided function (an astromodels function)

        :param function: astromodels function
        :param minimizer: the minimizer to use
        :param verbose: print every step of the fit procedure
        :return: best fit results
        """

        # This is a wrapper to give an easier way to fit simple data without having to go through the definition
        # of sources
        pts = PointSource("source", 0.0, 0.0, function)

        model = Model(pts)

        self.set_model(model)

        self._joint_like_obj = JointLikelihood(model, DataList(self), verbose=verbose)

        self._joint_like_obj.set_minimizer(minimizer)

        return self._joint_like_obj.fit()
Example #5
0
class XYLike(PluginPrototype):

    def __init__(self, name, x, y, yerr=None, poisson_data=False, quiet=False, source_name=None):

        nuisance_parameters = {}

        super(XYLike, self).__init__(name, nuisance_parameters)

        # Make x and y always arrays so we can handle them always in the same way
        # even if they have only one element

        self._x = np.array(x, ndmin=1)
        self._y = np.array(y, ndmin=1)

        # If there are specified errors, use those (assume Gaussian statistic)
        # otherwise make sure that the user specified poisson_error = True and use
        # Poisson statistic

        if yerr is not None:

            self._yerr = np.array(yerr, ndmin=1)

            assert np.all(self._yerr > 0), "Errors cannot be negative or zero."

            if not quiet:

                print("Using Gaussian statistic (equivalent to chi^2) with the provided errors.")

            self._is_poisson = False

            self._has_errors = True

        elif not poisson_data:

            self._yerr = np.ones_like(self._y)

            self._is_poisson = False

            self._has_errors = False

            if not quiet:

                print("Using unweighted Gaussian (equivalent to chi^2) statistic.")

        else:

            if not quiet:

                print("Using Poisson log-likelihood")

            self._is_poisson = True
            self._yerr = None
            self._has_errors = True

        # This will keep track of the simulated datasets we generate
        self._n_simulated_datasets = 0

        # This will contain the JointLikelihood object after a call to .fit()
        self._joint_like_obj = None

        self._likelihood_model = None
        # currently not used by XYLike, but needed for subclasses

        self._mask = np.ones(self._x.shape, dtype=bool)

        # This is the name of the source this SED refers to (if it is a SED)
        self._source_name = source_name

    @classmethod
    def from_function(cls, name, function, x, yerr, **kwargs):
        """
        Generate an XYLike plugin from an astromodels function instance
        
        :param name: name of plugin
        :param function: astromodels function instance
        :param x: where to simulate
        :param yerr: y errors or None for Poisson data
        :param kwargs: kwargs from xylike constructor
        :return: XYLike plugin
        """

        y = function(x)

        xyl_gen = XYLike("generator", x, y, yerr, **kwargs)

        pts = PointSource("fake", 0.0, 0.0, function)

        model = Model(pts)

        xyl_gen.set_model(model)

        return xyl_gen.get_simulated_dataset(name)

    @classmethod
    def from_dataframe(cls, name, dataframe, x_column='x', y_column='y', err_column='yerr', poisson=False):
        """
        Generate a XYLike instance from a Pandas.DataFrame instance

        :param name: the name for the XYLike instance
        :param dataframe: the input data frame
        :param x_column: name of the column to be used as x (default: 'x')
        :param y_column: name of the column to be used as y (default: 'y')
        :param err_column: name of the column to be used as error on y (default: 'yerr')
        :param poisson: if True, then the err_column is ignored and data are treated as Poisson distributed
        :return: a XYLike instance
        """

        x = dataframe[x_column]
        y = dataframe[y_column]

        if poisson is False:

            yerr = dataframe[err_column]

            if np.all(yerr == -99):

                # This is a dataframe generate with the to_dataframe method, which uses -99 to indicate that the
                # data are Poisson


                return cls(name, x=x, y=y, poisson_data=True)

            else:

                # A dataset with errors

                return cls(name, x=x, y=y, yerr=yerr)

        else:

            return cls(name, x=x, y=y, poisson_data=True)

    @classmethod
    def from_text_file(cls, name, filename):
        """
        Instance the plugin starting from a text file generated with the .to_txt() method. Note that a more general
        way of creating a XYLike instance from a text file is to read the file using pandas.DataFrame.from_csv, and
        then use the .from_dataframe method of the XYLike plugin:

        > df = pd.DataFrame.from_csv(filename, ...)
        > xyl = XYLike.from_dataframe("my instance", df)

        :param name: the name for the new instance
        :param filename: path to the file
        :return:
        """

        df = pd.read_csv(filename, sep=" ")

        return cls.from_dataframe(name, df)

    def to_dataframe(self):
        """
        Returns a pandas.DataFrame instance with the data in the 'x', 'y', and 'yerr' column. If the data are Poisson,
        the yerr column will be -99 for every entry

        :return: a pandas.DataFrame instance
        """

        x_series = pd.Series(self.x, name='x')
        y_series = pd.Series(self.y, name='y')

        if self._is_poisson:

            # Since DataFrame does not support metadata, there is no way to save the information that the data
            # are Poisson distributed. We use instead a value of -99 for the error, to indicate that the data
            # are Poisson

            yerr_series = pd.Series(np.ones_like(self.x) * (-99), name='yerr')

        else:

            yerr_series = pd.Series(self.yerr, name='yerr')

        df = pd.concat((x_series, y_series, yerr_series), axis=1)

        return df

    def to_txt(self, filename):
        """
        Save the dataset in a text file. You can read the content back in a dataframe using:

        > df = pandas.DataFrame.from_csv(filename, sep=' ')

        and recreate the XYLike instance as:

        > xyl = XYLike.from_dataframe(df)

        :param filename: Name of the output file
        :return: none
        """

        df = self.to_dataframe()  # type: pd.DataFrame

        df.to_csv(filename, sep=" ")

    def to_csv(self, *args, **kwargs):
        """
        Save the data in a comma-separated-values file (CSV) file. All keywords arguments are passed to the
        pandas.DataFrame.to_csv method (see the documentation from pandas for all possibilities). This gives a very
        high control on the format of the output

        All arguments are forwarded to pandas.DataFrame.to_csv

        :return: none
        """

        df = self.to_dataframe()

        df.to_csv(**kwargs)

    def assign_to_source(self, source_name):
        """
        Assign these data to the given source (instead of to the sum of all sources, which is the default)
        
        :param source_name: name of the source (must be contained in the likelihood model)
        :return: none
        """

        if self._likelihood_model is not None and source_name is not None:

            assert source_name in self._likelihood_model.point_sources, "Source %s is not a point source in " \
                                                                        "the likelihood model" % source_name

        self._source_name = source_name

    @property
    def x(self):

        return self._x

    @property
    def y(self):

        return self._y

    @property
    def yerr(self):

        return self._yerr

    @property
    def is_poisson(self):

        return self._is_poisson

    @property
    def has_errors(self):

        return self._has_errors

    def set_model(self, likelihood_model_instance):
        """
        Set the model to be used in the joint minimization. Must be a LikelihoodModel instance.

        :param likelihood_model_instance: instance of Model
        :type likelihood_model_instance: astromodels.Model
        """

        if likelihood_model_instance is None:

            return

        if self._source_name is not None:

            # Make sure that the source is in the model
            assert self._source_name in likelihood_model_instance.point_sources, \
                                                "This XYLike plugin refers to the source %s, " \
                                                "but that source is not a point source in the likelihood model" % (self._source_name)

        self._likelihood_model = likelihood_model_instance

    def _get_total_expectation(self):

        if self._source_name is None:

            n_point_sources = self._likelihood_model.get_number_of_point_sources()

            assert n_point_sources > 0, "You need to have at least one point source defined"
            assert self._likelihood_model.get_number_of_extended_sources() == 0, "XYLike does not support extended sources"

            # Make a function which will stack all point sources (XYLike do not support spatial dimension)

            expectation = np.sum([source(self._x, tag=self._tag) for source in list(self._likelihood_model.point_sources.values())],
                                 axis=0)

        else:

            # This XYLike dataset refers to a specific source

            # Note that we checked that self._source_name is in the model when the model was set

            if self._source_name in self._likelihood_model.point_sources:
            
                expectation = self._likelihood_model.point_sources[self._source_name](self._x)

            else:

                raise KeyError("This XYLike plugin has been assigned to source %s, "
                               "which is not a point soure in the current model" % self._source_name)

        return expectation

    def get_log_like(self):
        """
        Return the value of the log-likelihood with the current values for the
        parameters
        """

        expectation = self._get_total_expectation()

        if self._is_poisson:

            # Poisson log-likelihood

            return np.sum(poisson_log_likelihood_ideal_bkg(self._y, np.zeros_like(self._y), expectation))

        else:

            # Chi squared
            chi2_ = half_chi2(self._y, self._yerr, expectation)

            assert np.all(np.isfinite(chi2_))

            return np.sum(chi2_) * (-1)

    def get_simulated_dataset(self, new_name=None):

        assert self._has_errors, "You cannot simulate a dataset if the original dataset has no errors"

        self._n_simulated_datasets += 1

        # unmask the data

        old_mask = copy.copy(self._mask)

        self._mask = np.ones(self._x.shape, dtype=bool)


        if new_name is None:

            new_name = "%s_sim%i" % (self.name, self._n_simulated_datasets)

        # Get total expectation from model
        expectation = self._get_total_expectation()

        if self._is_poisson:

            new_y = np.random.poisson(expectation)

        else:

            new_y = np.random.normal(expectation, self._yerr)

        # remask the data BEFORE creating the new plugin

        self._mask = old_mask

        return self._new_plugin(new_name, self._x, new_y, yerr=self._yerr)

    def _new_plugin(self, name, x, y, yerr):
        """
        construct a new plugin. allows for returning a new plugin
        from simulated data set while customizing the constructor
        further down the inheritance tree

        :param name: new name
        :param x: new x
        :param y: new y
        :param yerr: new yerr
        :return: new XYLike


        """

        new_xy = type(self)(name, x, y, yerr, poisson_data=self._is_poisson, quiet=True)

        # apply the current mask

        new_xy._mask = copy.copy(self._mask)

        return new_xy

    def plot(self, x_label='x', y_label='y', x_scale='linear', y_scale='linear'):

        fig, sub = plt.subplots(1,1)

        sub.errorbar(self.x, self.y, yerr=self.yerr, fmt='.')

        sub.set_xscale(x_scale)
        sub.set_yscale(y_scale)

        sub.set_xlabel(x_label)
        sub.set_ylabel(y_label)

        if self._likelihood_model is not None:

            flux = self._get_total_expectation()

            sub.plot(self.x, flux, '--', label='model')

            sub.legend(loc=0)

        return fig


    def inner_fit(self):
        """
        This is used for the profile likelihood. Keeping fixed all parameters in the
        LikelihoodModel, this method minimize the logLike over the remaining nuisance
        parameters, i.e., the parameters belonging only to the model for this
        particular detector. If there are no nuisance parameters, simply return the
        logLike value.
        """

        return self.get_log_like()

    def get_model(self):

        return self._get_total_expectation()


    def fit(self, function, minimizer='minuit', verbose=False):
        """
        Fit the data with the provided function (an astromodels function)

        :param function: astromodels function
        :param minimizer: the minimizer to use
        :param verbose: print every step of the fit procedure
        :return: best fit results
        """

        # This is a wrapper to give an easier way to fit simple data without having to go through the definition
        # of sources
        pts = PointSource("source", 0.0, 0.0, function)

        model = Model(pts)

        self.set_model(model)

        self._joint_like_obj = JointLikelihood(model, DataList(self), verbose=verbose)

        self._joint_like_obj.set_minimizer(minimizer)

        return self._joint_like_obj.fit()

    def goodness_of_fit(self, n_iterations=1000, continue_of_failure=False):
        """
        Returns the goodness of fit of the performed fit

        :param n_iterations: number of Monte Carlo simulations to generate
        :param continue_of_failure: whether to continue or not if a fit fails (default: False)
        :return: tuple (goodness of fit, frame with all results, frame with all likelihood values)
        """

        g = GoodnessOfFit(self._joint_like_obj)

        return g.by_mc(n_iterations, continue_of_failure)


    def get_number_of_data_points(self):
        """
        returns the number of active data points
        :return:
        """

        # the sum of the mask should be the number of data points in use

        return self._mask.sum()
Example #6
0
    def worker(self, interval):

        # Get the dataset for this interval

        this_data = self._data_getter(interval)  # type: DataList

        # Get the model for this interval

        this_models = self._model_getter(interval)

        # Apply preprocessor (if any)
        if self._preprocessor is not None:

            self._preprocessor(this_models, this_data)

        n_models = len(this_models)

        # Fit all models and collect the results

        parameters_frames = []
        like_frames = []
        analysis_results = []

        for this_model in this_models:

            # Prepare a joint likelihood and fit it

            with warnings.catch_warnings():

                warnings.simplefilter("ignore", RuntimeWarning)

                jl = JointLikelihood(this_model, this_data)

            this_parameter_frame, this_like_frame = self._fitter(jl)

            # Append results

            parameters_frames.append(this_parameter_frame)
            like_frames.append(this_like_frame)
            analysis_results.append(jl.results)

        # Now merge the results in one data frame for the parameters and one for the likelihood
        # values

        if n_models > 1:

            # Prepare the keys so that the first model will be indexed with model_0, the second model_1 and so on

            keys = ["model_%i" % x for x in range(n_models)]

            # Concatenate all results in one frame for parameters and one for likelihood

            frame_with_parameters = pd.concat(parameters_frames, keys=keys)
            frame_with_like = pd.concat(like_frames, keys=keys)

        else:

            frame_with_parameters = parameters_frames[0]
            frame_with_like = like_frames[0]

        return frame_with_parameters, frame_with_like, analysis_results
Example #7
0
def test_energy_time_fit():

    # Let's generate our dataset of 4 spectra with a normalization that follows
    # a powerlaw in time

    def generate_one(K):
        # Let's generate some data with y = Powerlaw(x)

        gen_function = Powerlaw()
        gen_function.K = K

        # Generate a dataset using the power law, and a
        # constant 30% error

        x = np.logspace(0, 2, 50)

        xyl_generator = XYLike.from_function("sim_data",
                                             function=gen_function,
                                             x=x,
                                             yerr=0.3 * gen_function(x))

        y = xyl_generator.y
        y_err = xyl_generator.yerr

        # xyl = XYLike("data", x, y, y_err)

        # xyl.plot(x_scale='log', y_scale='log')

        return x, y, y_err

    time_tags = np.array([1.0, 2.0, 5.0, 10.0])

    # This is the power law that defines the normalization as a function of time

    normalizations = 0.23 * time_tags**(-1.2)

    datasets = list(map(generate_one, normalizations))

    # Now set up the fit and fit it

    time = IndependentVariable("time", 1.0, u.s)

    plugins = []

    for i, dataset in enumerate(datasets):
        x, y, y_err = dataset

        xyl = XYLike("data%i" % i, x, y, y_err)

        xyl.tag = (time, time_tags[i])

        assert xyl.tag == (time, time_tags[i], None)

        plugins.append(xyl)

    data = DataList(*plugins)

    spectrum = Powerlaw()
    spectrum.K.bounds = (0.01, 1000.0)

    src = PointSource("test", 0.0, 0.0, spectrum)

    model = Model(src)

    model.add_independent_variable(time)

    time_po = Powerlaw()
    time_po.K.bounds = (0.01, 1000)
    time_po.K.value = 2.0
    time_po.index = -1.5

    model.link(spectrum.K, time, time_po)

    jl = JointLikelihood(model, data)

    jl.set_minimizer("minuit")

    best_fit_parameters, likelihood_values = jl.fit()

    # Make sure we are within 10% of the expected result

    assert np.allclose(
        best_fit_parameters["value"].values,
        [0.25496115, -1.2282951, -2.01508341],
        rtol=0.1,
    )
Example #8
0
class XYLike(PluginPrototype):

    def __init__(self, name, x, y, yerr=None, poisson_data=False, quiet=False, source_name=None):

        nuisance_parameters = {}

        super(XYLike, self).__init__(name, nuisance_parameters)

        # Make x and y always arrays so we can handle them always in the same way
        # even if they have only one element

        self._x = np.array(x, ndmin=1)
        self._y = np.array(y, ndmin=1)

        # If there are specified errors, use those (assume Gaussian statistic)
        # otherwise make sure that the user specified poisson_error = True and use
        # Poisson statistic

        if yerr is not None:

            self._yerr = np.array(yerr, ndmin=1)

            assert np.all(self._yerr > 0), "Errors cannot be negative or zero."

            if not quiet:

                print("Using Gaussian statistic (equivalent to chi^2) with the provided errors.")

            self._is_poisson = False

            self._has_errors = True

        elif not poisson_data:

            self._yerr = np.ones_like(self._y)

            self._is_poisson = False

            self._has_errors = False

            if not quiet:

                print("Using unweighted Gaussian (equivalent to chi^2) statistic.")

        else:

            if not quiet:

                print("Using Poisson log-likelihood")

            self._is_poisson = True
            self._yerr = None
            self._has_errors = True

        # This will keep track of the simulated datasets we generate
        self._n_simulated_datasets = 0

        # This will contain the JointLikelihood object after a call to .fit()
        self._joint_like_obj = None

        self._likelihood_model = None
        # currently not used by XYLike, but needed for subclasses

        self._mask = np.ones(self._x.shape, dtype=bool)

        # This is the name of the source this SED refers to (if it is a SED)
        self._source_name = source_name

    @classmethod
    def from_function(cls, name, function, x, yerr, **kwargs):
        """
        Generate an XYLike plugin from an astromodels function instance
        
        :param name: name of plugin
        :param function: astromodels function instance
        :param x: where to simulate
        :param yerr: y errors or None for Poisson data
        :param kwargs: kwargs from xylike constructor
        :return: XYLike plugin
        """

        y = function(x)

        xyl_gen = XYLike("generator", x, y, yerr, **kwargs)

        pts = PointSource("fake", 0.0, 0.0, function)

        model = Model(pts)

        xyl_gen.set_model(model)

        return xyl_gen.get_simulated_dataset(name)

    @classmethod
    def from_dataframe(cls, name, dataframe, x_column='x', y_column='y', err_column='yerr', poisson=False):
        """
        Generate a XYLike instance from a Pandas.DataFrame instance

        :param name: the name for the XYLike instance
        :param dataframe: the input data frame
        :param x_column: name of the column to be used as x (default: 'x')
        :param y_column: name of the column to be used as y (default: 'y')
        :param err_column: name of the column to be used as error on y (default: 'yerr')
        :param poisson: if True, then the err_column is ignored and data are treated as Poisson distributed
        :return: a XYLike instance
        """

        x = dataframe[x_column]
        y = dataframe[y_column]

        if poisson is False:

            yerr = dataframe[err_column]

            if np.all(yerr == -99):

                # This is a dataframe generate with the to_dataframe method, which uses -99 to indicate that the
                # data are Poisson


                return cls(name, x=x, y=y, poisson_data=True)

            else:

                # A dataset with errors

                return cls(name, x=x, y=y, yerr=yerr)

        else:

            return cls(name, x=x, y=y, poisson_data=True)

    @classmethod
    def from_text_file(cls, name, filename):
        """
        Instance the plugin starting from a text file generated with the .to_txt() method. Note that a more general
        way of creating a XYLike instance from a text file is to read the file using pandas.DataFrame.from_csv, and
        then use the .from_dataframe method of the XYLike plugin:

        > df = pd.DataFrame.from_csv(filename, ...)
        > xyl = XYLike.from_dataframe("my instance", df)

        :param name: the name for the new instance
        :param filename: path to the file
        :return:
        """

        df = pd.DataFrame.from_csv(filename, sep=" ")

        return cls.from_dataframe(name, df)

    def to_dataframe(self):
        """
        Returns a pandas.DataFrame instance with the data in the 'x', 'y', and 'yerr' column. If the data are Poisson,
        the yerr column will be -99 for every entry

        :return: a pandas.DataFrame instance
        """

        x_series = pd.Series.from_array(self.x, name='x')
        y_series = pd.Series.from_array(self.y, name='y')

        if self._is_poisson:

            # Since DataFrame does not support metadata, there is no way to save the information that the data
            # are Poisson distributed. We use instead a value of -99 for the error, to indicate that the data
            # are Poisson

            yerr_series = pd.Series.from_array(np.ones_like(self.x) * (-99), name='yerr')

        else:

            yerr_series = pd.Series.from_array(self.yerr, name='yerr')

        df = pd.concat((x_series, y_series, yerr_series), axis=1)

        return df

    def to_txt(self, filename):
        """
        Save the dataset in a text file. You can read the content back in a dataframe using:

        > df = pandas.DataFrame.from_csv(filename, sep=' ')

        and recreate the XYLike instance as:

        > xyl = XYLike.from_dataframe(df)

        :param filename: Name of the output file
        :return: none
        """

        df = self.to_dataframe()  # type: pd.DataFrame

        df.to_csv(filename, sep=" ")

    def to_csv(self, *args, **kwargs):
        """
        Save the data in a comma-separated-values file (CSV) file. All keywords arguments are passed to the
        pandas.DataFrame.to_csv method (see the documentation from pandas for all possibilities). This gives a very
        high control on the format of the output

        All arguments are forwarded to pandas.DataFrame.to_csv

        :return: none
        """

        df = self.to_dataframe()

        df.to_csv(**kwargs)

    def assign_to_source(self, source_name):
        """
        Assign these data to the given source (instead of to the sum of all sources, which is the default)
        
        :param source_name: name of the source (must be contained in the likelihood model)
        :return: none
        """

        if self._likelihood_model is not None and source_name is not None:

            assert source_name in self._likelihood_model.point_sources, "Source %s is not a point source in " \
                                                                        "the likelihood model" % source_name

        self._source_name = source_name

    @property
    def x(self):

        return self._x

    @property
    def y(self):

        return self._y

    @property
    def yerr(self):

        return self._yerr

    @property
    def is_poisson(self):

        return self._is_poisson

    @property
    def has_errors(self):

        return self._has_errors

    def set_model(self, likelihood_model_instance):
        """
        Set the model to be used in the joint minimization. Must be a LikelihoodModel instance.

        :param likelihood_model_instance: instance of Model
        :type likelihood_model_instance: astromodels.Model
        """

        if likelihood_model_instance is None:

            return

        if self._source_name is not None:

            # Make sure that the source is in the model
            assert self._source_name in likelihood_model_instance.point_sources, \
                                                "This XYLike plugin refers to the source %s, " \
                                                "but that source is not a point source in the likelihood model" % (self._source_name)

        self._likelihood_model = likelihood_model_instance

    def _get_total_expectation(self):

        if self._source_name is None:

            n_point_sources = self._likelihood_model.get_number_of_point_sources()

            assert n_point_sources > 0, "You need to have at least one point source defined"
            assert self._likelihood_model.get_number_of_extended_sources() == 0, "XYLike does not support extended sources"

            # Make a function which will stack all point sources (XYLike do not support spatial dimension)

            expectation = np.sum(map(lambda source: source(self._x, tag=self._tag),
                                     self._likelihood_model.point_sources.values()),
                                 axis=0)

        else:

            # This XYLike dataset refers to a specific source

            # Note that we checked that self._source_name is in the model when the model was set

            if self._source_name in self._likelihood_model.point_sources:
            
                expectation = self._likelihood_model.point_sources[self._source_name](self._x)

            else:

                raise KeyError("This XYLike plugin has been assigned to source %s, "
                               "which is not a point soure in the current model" % self._source_name)

        return expectation

    def get_log_like(self):
        """
        Return the value of the log-likelihood with the current values for the
        parameters
        """

        expectation = self._get_total_expectation()

        if self._is_poisson:

            # Poisson log-likelihood

            return np.sum(poisson_log_likelihood_ideal_bkg(self._y, np.zeros_like(self._y), expectation))

        else:

            # Chi squared
            chi2_ = half_chi2(self._y, self._yerr, expectation)

            assert np.all(np.isfinite(chi2_))

            return np.sum(chi2_) * (-1)

    def get_simulated_dataset(self, new_name=None):

        assert self._has_errors, "You cannot simulate a dataset if the original dataset has no errors"

        self._n_simulated_datasets += 1

        # unmask the data

        old_mask = copy.copy(self._mask)

        self._mask = np.ones(self._x.shape, dtype=bool)


        if new_name is None:

            new_name = "%s_sim%i" % (self.name, self._n_simulated_datasets)

        # Get total expectation from model
        expectation = self._get_total_expectation()

        if self._is_poisson:

            new_y = np.random.poisson(expectation)

        else:

            new_y = np.random.normal(expectation, self._yerr)

        # remask the data BEFORE creating the new plugin

        self._mask = old_mask

        return self._new_plugin(new_name, self._x, new_y, yerr=self._yerr)

    def _new_plugin(self, name, x, y, yerr):
        """
        construct a new plugin. allows for returning a new plugin
        from simulated data set while customizing the constructor
        further down the inheritance tree

        :param name: new name
        :param x: new x
        :param y: new y
        :param yerr: new yerr
        :return: new XYLike


        """

        new_xy = type(self)(name, x, y, yerr, poisson_data=self._is_poisson, quiet=True)

        # apply the current mask

        new_xy._mask = copy.copy(self._mask)

        return new_xy

    def plot(self, x_label='x', y_label='y', x_scale='linear', y_scale='linear'):

        fig, sub = plt.subplots(1,1)

        sub.errorbar(self.x, self.y, yerr=self.yerr, fmt='.')

        sub.set_xscale(x_scale)
        sub.set_yscale(y_scale)

        sub.set_xlabel(x_label)
        sub.set_ylabel(y_label)

        if self._likelihood_model is not None:

            flux = self._get_total_expectation()

            sub.plot(self.x, flux, '--', label='model')

            sub.legend(loc=0)

        return fig


    def inner_fit(self):
        """
        This is used for the profile likelihood. Keeping fixed all parameters in the
        LikelihoodModel, this method minimize the logLike over the remaining nuisance
        parameters, i.e., the parameters belonging only to the model for this
        particular detector. If there are no nuisance parameters, simply return the
        logLike value.
        """

        return self.get_log_like()

    def get_model(self):

        return self._get_total_expectation()


    def fit(self, function, minimizer='minuit', verbose=False):
        """
        Fit the data with the provided function (an astromodels function)

        :param function: astromodels function
        :param minimizer: the minimizer to use
        :param verbose: print every step of the fit procedure
        :return: best fit results
        """

        # This is a wrapper to give an easier way to fit simple data without having to go through the definition
        # of sources
        pts = PointSource("source", 0.0, 0.0, function)

        model = Model(pts)

        self.set_model(model)

        self._joint_like_obj = JointLikelihood(model, DataList(self), verbose=verbose)

        self._joint_like_obj.set_minimizer(minimizer)

        return self._joint_like_obj.fit()

    def goodness_of_fit(self, n_iterations=1000, continue_of_failure=False):
        """
        Returns the goodness of fit of the performed fit

        :param n_iterations: number of Monte Carlo simulations to generate
        :param continue_of_failure: whether to continue or not if a fit fails (default: False)
        :return: tuple (goodness of fit, frame with all results, frame with all likelihood values)
        """

        g = GoodnessOfFit(self._joint_like_obj)

        return g.by_mc(n_iterations, continue_of_failure)


    def get_number_of_data_points(self):
        """
        returns the number of active data points
        :return:
        """

        # the sum of the mask should be the number of data points in use

        return self._mask.sum()
Example #9
0
def unbinned_polyfit(events: Iterable[float], grade: int, t_start: float, t_stop: float, exposure: float, bayes: bool) -> Tuple[Polynomial, float]:
    """
    function to fit a polynomial to unbinned event data. 
    not a member to allow parallel computation

    :param events: the events to fit
    :param grade: the polynomical order or grade
    :param t_start: the start time to fit over
    :param t_stop: the end time to fit over
    :param expousure: the exposure of the interval
    :param bayes: to do a bayesian fit or not

    """

    log.debug(f"starting unbinned_polyfit with grade {grade}")
    log.debug(f"have {len(events)} events with {exposure} exposure")

    # create 3ML plugins and fit them with 3ML!
    # should eventuallly allow better config

    # select the model based on the grade

    if threeML_config.time_series.default_fit_method is not None:

        bayes = threeML_config.time_series.default_fit_method
        log.debug("using a default poly fit method")

    if len(events) == 0:

        log.debug("no events! returning zero")

        return Polynomial([0] * (grade + 1)), 0

    shape = _grade_model_lookup[grade]()

    with silence_console_log():

        ps = PointSource("dummy", 0, 0, spectral_shape=shape)

        model = Model(ps)

        observation = EventObservation(events, exposure, t_start, t_stop)

        xy = UnbinnedPoissonLike("series", observation=observation)

        if not bayes:

            # make sure the model is positive

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)

                    v.value = 10

                else:

                    v.value = 0.0

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            jl: JointLikelihood = JointLikelihood(model, DataList(xy))

            grid_minimizer = GlobalMinimization("grid")

            local_minimizer = LocalMinimization("minuit")

            my_grid = {
                model.dummy.spectrum.main.shape.a: np.logspace(0, 3, 10)}

            grid_minimizer.setup(
                second_minimization=local_minimizer, grid=my_grid)

            jl.set_minimizer(grid_minimizer)

            # if the fit falis, retry and then just accept

            try:

                jl.fit(quiet=True)

            except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                try:

                    jl.fit(quiet=True)

                except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                    log.debug("all MLE fits failed, returning zero")

                    return Polynomial([0]*(grade + 1)), 0

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            final_polynomial.set_covariace_matrix(jl.results.covariance_matrix)

            min_log_likelihood = xy.get_log_like()

        else:

            # set smart priors

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)
                    v.prior = Log_normal(mu=np.log(5), sigma=np.log(5))
                    v.value = 1

                else:

                    v.prior = Gaussian(mu=0, sigma=.5)
                    v.value = 0.1

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            ba: BayesianAnalysis = BayesianAnalysis(model, DataList(xy))

            ba.set_sampler("emcee")

            ba.sampler.setup(n_iterations=500, n_burn_in=200, n_walkers=20)

            ba.sample(quiet=True)

            ba.restore_median_fit()

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            final_polynomial.set_covariace_matrix(
                ba.results.estimate_covariance_matrix())

            min_log_likelihood = xy.get_log_like()

    log.debug(f"-min loglike: {-min_log_likelihood}")

    return final_polynomial, -min_log_likelihood
Example #10
0
def polyfit(x: Iterable[float], y: Iterable[float], grade: int, exposure: Iterable[float], bayes: bool = False) -> Tuple[Polynomial, float]:
    """ 
    function to fit a polynomial to data. 
    not a member to allow parallel computation

    :param x: the x coord of the data
    :param y: teh y coord of the data
    :param grade: the polynomical order or grade
    :param expousure: the exposure of the interval
    :param bayes: to do a bayesian fit or not


    """

    # Check that we have enough counts to perform the fit, otherwise
    # return a "zero polynomial"

    log.debug(f"starting polyfit with grade {grade} ")

    if threeML_config.time_series.default_fit_method is not None:

        bayes = threeML_config.time_series.default_fit_method
        log.debug("using a default poly fit method")

    nan_mask = np.isnan(y)

    y = y[~nan_mask]
    x = x[~nan_mask]
    exposure = exposure[~nan_mask]

    non_zero_mask = y > 0
    n_non_zero = non_zero_mask.sum()
    if n_non_zero == 0:

        log.debug("no counts, return 0")

        # No data, nothing to do!
        return Polynomial([0.0]*(grade+1)), 0.0

    # create 3ML plugins and fit them with 3ML!
    # should eventuallly allow better config

    # seelct the model based on the grade

    shape = _grade_model_lookup[grade]()

    ps = PointSource("_dummy", 0, 0, spectral_shape=shape)

    model = Model(ps)

    avg = np.mean(y/exposure)

    log.debug(f"starting polyfit with avg norm {avg}")

    with silence_console_log():

        xy = XYLike("series", x=x, y=y, exposure=exposure,
                    poisson_data=True, quiet=True)

        if not bayes:

            # make sure the model is positive

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)

                    v.value = avg

                else:

                    v.value = 0.0

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            jl: JointLikelihood = JointLikelihood(model, DataList(xy))

            jl.set_minimizer("minuit")

            # if the fit falis, retry and then just accept

            try:

                jl.fit(quiet=True)

            except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                log.debug("1st fit failed")

                try:

                    jl.fit(quiet=True)

                except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance):

                    log.debug("all MLE fits failed")

                    pass

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            try:
                final_polynomial.set_covariace_matrix(
                    jl.results.covariance_matrix)

            except:

                log.exception(f"Fit failed in channel")
                raise FitFailed()

            min_log_likelihood = xy.get_log_like()

        else:

            # set smart priors

            for i, (k, v) in enumerate(model.free_parameters.items()):

                if i == 0:

                    v.bounds = (0, None)
                    v.prior = Log_normal(
                        mu=np.log(avg), sigma=np.max([np.log(avg/2), 1]))
                    v.value = 1

                else:

                    v.prior = Gaussian(mu=0, sigma=2)
                    v.value = 1e-2

            # we actually use a line here
            # because a constant is returns a
            # single number

            if grade == 0:

                shape.b = 0
                shape.b.fix = True

            ba: BayesianAnalysis = BayesianAnalysis(model, DataList(xy))

            ba.set_sampler("emcee")

            ba.sampler.setup(n_iterations=500, n_burn_in=200, n_walkers=20)

            ba.sample(quiet=True)

            ba.restore_median_fit()

            coeff = [v.value for _, v in model.free_parameters.items()]

            log.debug(f"got coeff: {coeff}")

            final_polynomial = Polynomial(coeff)

            final_polynomial.set_covariace_matrix(
                ba.results.estimate_covariance_matrix())

            min_log_likelihood = xy.get_log_like()

    log.debug(f"-min loglike: {-min_log_likelihood}")

    return final_polynomial, -min_log_likelihood
Example #11
0
def test_ubinned_poisson_full(event_observation_contiguous, event_observation_split):

    s = Line()

    ps = PointSource("s", 0, 0, spectral_shape=s)

    s.a.bounds = (0, None)
    s.a.value = .1
    s.b.value = .1

    s.a.prior = Log_normal(mu=np.log(10), sigma=1)
    s.b.prior = Gaussian(mu=0, sigma=1)

    m = Model(ps)

    ######
    ######
    ######

    
    ub1 = UnbinnedPoissonLike("test", observation=event_observation_contiguous)

    jl = JointLikelihood(m, DataList(ub1))

    jl.fit(quiet=True)

    np.testing.assert_allclose([s.a.value, s.b.value], [6.11, 1.45], rtol=.5)

    ba = BayesianAnalysis(m, DataList(ub1))

    ba.set_sampler("emcee")

    ba.sampler.setup(n_burn_in=100, n_walkers=20, n_iterations=500)

    ba.sample(quiet=True)

    ba.restore_median_fit()

    np.testing.assert_allclose([s.a.value, s.b.value], [6.11, 1.45], rtol=.5)

    ######
    ######
    ######

    ub2 = UnbinnedPoissonLike("test", observation=event_observation_split)

    jl = JointLikelihood(m, DataList(ub2))

    jl.fit(quiet=True)

    np.testing.assert_allclose([s.a.value, s.b.value], [2., .2], rtol=.5)

    ba = BayesianAnalysis(m, DataList(ub2))

    ba.set_sampler("emcee")

    ba.sampler.setup(n_burn_in=100, n_walkers=20, n_iterations=500)

    ba.sample(quiet=True)

    ba.restore_median_fit()

    np.testing.assert_allclose([s.a.value, s.b.value], [2., .2], rtol=.5)
Example #12
0
def test_energy_time_fit():

    # Let's generate our dataset of 4 spectra with a normalization that follows
    # a powerlaw in time

    def generate_one(K):
        # Let's generate some data with y = Powerlaw(x)

        gen_function = Powerlaw()
        gen_function.K = K

        # Generate a dataset using the power law, and a
        # constant 30% error

        x = np.logspace(0, 2, 50)

        xyl_generator = XYLike.from_function("sim_data", function=gen_function,
                                             x=x,
                                             yerr=0.3 * gen_function(x))

        y = xyl_generator.y
        y_err = xyl_generator.yerr

        # xyl = XYLike("data", x, y, y_err)

        # xyl.plot(x_scale='log', y_scale='log')

        return x, y, y_err

    time_tags = np.array([1.0, 2.0, 5.0, 10.0])

    # This is the power law that defines the normalization as a function of time

    normalizations = 0.23 * time_tags ** (-1.2)

    datasets = map(generate_one, normalizations)

    # Now set up the fit and fit it

    time = IndependentVariable("time", 1.0, u.s)

    plugins = []

    for i, dataset in enumerate(datasets):
        x, y, y_err = dataset

        xyl = XYLike("data%i" % i, x, y, y_err)

        xyl.tag = (time, time_tags[i])

        assert xyl.tag == (time, time_tags[i], None)

        plugins.append(xyl)

    data = DataList(*plugins)

    spectrum = Powerlaw()
    spectrum.K.bounds = (0.01, 1000.0)

    src = PointSource("test", 0.0, 0.0, spectrum)

    model = Model(src)

    model.add_independent_variable(time)

    time_po = Powerlaw()
    time_po.K.bounds = (0.01, 1000)
    time_po.K.value = 2.0
    time_po.index = -1.5

    model.link(spectrum.K, time, time_po)

    jl = JointLikelihood(model, data)

    jl.set_minimizer("minuit")

    best_fit_parameters, likelihood_values = jl.fit()

    # Make sure we are within 10% of the expected result

    assert np.allclose(best_fit_parameters['value'].values, [0.25496115, -1.2282951 , -2.01508341], rtol=0.1)