def fit(self, quiet=False, compute_covariance=True, n_samples=5000): """ Perform a fit of the current likelihood model on the datasets :param quiet: If True, print the results (default), otherwise do not print anything :param compute_covariance:If True (default), compute and display the errors and the correlation matrix. :return: a dictionary with the results on the parameters, and the values of the likelihood at the minimum for each dataset and the total one. """ # Update the list of free parameters, to be safe against changes the user might do between # the creation of this class and the calling of this method self._update_free_parameters() # Empty the call recorder self._record_calls = {} self._ncalls = 0 # Check if we have free parameters, otherwise simply return the value of the log like if len(self._free_parameters) == 0: custom_warnings.warn( "There is no free parameter in the current model", RuntimeWarning) # Create the minimizer anyway because it will be needed by the following code self._minimizer = self._get_minimizer(self.minus_log_like_profile, self._free_parameters) # Store the "minimum", which is just the current value self._current_minimum = float(self.minus_log_like_profile()) else: # Instance the minimizer # If we have a global minimizer, use that first (with no covariance) if isinstance(self._minimizer_type, minimization.GlobalMinimization): # Do global minimization first global_minimizer = self._get_minimizer( self.minus_log_like_profile, self._free_parameters) xs, global_log_likelihood_minimum = global_minimizer.minimize( compute_covar=False) # Gather global results paths = [] values = [] errors = [] units = [] for par in list(self._free_parameters.values()): paths.append(par.path) values.append(par.value) errors.append(0) units.append(par.unit) global_results = ResultsTable(paths, values, errors, errors, units) if not quiet: print( "\n\nResults after global minimizer (before secondary optimization):" ) global_results.display() print("\nTotal log-likelihood minimum: %.3f\n" % global_log_likelihood_minimum) # Now set up secondary minimizer self._minimizer = self._minimizer_type.get_second_minimization_instance( self.minus_log_like_profile, self._free_parameters) else: # Only local minimization to be performed self._minimizer = self._get_minimizer( self.minus_log_like_profile, self._free_parameters) # Perform the fit, but first flush stdout (so if we have verbose=True the messages there will follow # what is already in the buffer) sys.stdout.flush() xs, log_likelihood_minimum = self._minimizer.minimize( compute_covar=compute_covariance) if log_likelihood_minimum == minimization.FIT_FAILED: raise FitFailed("The fit failed to converge.") # Store the current minimum for the -log likelihood self._current_minimum = float(log_likelihood_minimum) # First restore best fit (to make sure we compute the likelihood at the right point in the following) self._minimizer.restore_best_fit() # Now collect the values for the likelihood for the various datasets # Fill the dictionary with the values of the -log likelihood (dataset by dataset) minus_log_likelihood_values = collections.OrderedDict() # Keep track of the total for a double check total = 0 # sum up the total number of data points total_number_of_data_points = 0 for dataset in list(self._data_list.values()): ml = dataset.inner_fit() * (-1) minus_log_likelihood_values[dataset.name] = ml total += ml total_number_of_data_points += dataset.get_number_of_data_points() assert ( total == self._current_minimum ), "Current minimum stored after fit and current do not correspond!" # compute additional statistics measures statistical_measures = collections.OrderedDict() # for MLE we can only compute the AIC and BIC as they # are point estimates statistical_measures["AIC"] = aic(-total, len(self._free_parameters), total_number_of_data_points) statistical_measures["BIC"] = bic(-total, len(self._free_parameters), total_number_of_data_points) # Now instance an analysis results class self._analysis_results = MLEResults( self.likelihood_model, self._minimizer.covariance_matrix, minus_log_likelihood_values, statistical_measures=statistical_measures, n_samples=n_samples, ) # Show the results if not quiet: self._analysis_results.display() return ( self._analysis_results.get_data_frame(), self._analysis_results.get_statistic_frame(), )
def polyfit(x: Iterable[float], y: Iterable[float], grade: int, exposure: Iterable[float], bayes: bool = False) -> Tuple[Polynomial, float]: """ function to fit a polynomial to data. not a member to allow parallel computation :param x: the x coord of the data :param y: teh y coord of the data :param grade: the polynomical order or grade :param expousure: the exposure of the interval :param bayes: to do a bayesian fit or not """ # Check that we have enough counts to perform the fit, otherwise # return a "zero polynomial" log.debug(f"starting polyfit with grade {grade} ") if threeML_config.time_series.default_fit_method is not None: bayes = threeML_config.time_series.default_fit_method log.debug("using a default poly fit method") nan_mask = np.isnan(y) y = y[~nan_mask] x = x[~nan_mask] exposure = exposure[~nan_mask] non_zero_mask = y > 0 n_non_zero = non_zero_mask.sum() if n_non_zero == 0: log.debug("no counts, return 0") # No data, nothing to do! return Polynomial([0.0]*(grade+1)), 0.0 # create 3ML plugins and fit them with 3ML! # should eventuallly allow better config # seelct the model based on the grade shape = _grade_model_lookup[grade]() ps = PointSource("_dummy", 0, 0, spectral_shape=shape) model = Model(ps) avg = np.mean(y/exposure) log.debug(f"starting polyfit with avg norm {avg}") with silence_console_log(): xy = XYLike("series", x=x, y=y, exposure=exposure, poisson_data=True, quiet=True) if not bayes: # make sure the model is positive for i, (k, v) in enumerate(model.free_parameters.items()): if i == 0: v.bounds = (0, None) v.value = avg else: v.value = 0.0 # we actually use a line here # because a constant is returns a # single number if grade == 0: shape.b = 0 shape.b.fix = True jl: JointLikelihood = JointLikelihood(model, DataList(xy)) jl.set_minimizer("minuit") # if the fit falis, retry and then just accept try: jl.fit(quiet=True) except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance): log.debug("1st fit failed") try: jl.fit(quiet=True) except(FitFailed, BadCovariance, AllFitFailed, CannotComputeCovariance): log.debug("all MLE fits failed") pass coeff = [v.value for _, v in model.free_parameters.items()] log.debug(f"got coeff: {coeff}") final_polynomial = Polynomial(coeff) try: final_polynomial.set_covariace_matrix( jl.results.covariance_matrix) except: log.exception(f"Fit failed in channel") raise FitFailed() min_log_likelihood = xy.get_log_like() else: # set smart priors for i, (k, v) in enumerate(model.free_parameters.items()): if i == 0: v.bounds = (0, None) v.prior = Log_normal( mu=np.log(avg), sigma=np.max([np.log(avg/2), 1])) v.value = 1 else: v.prior = Gaussian(mu=0, sigma=2) v.value = 1e-2 # we actually use a line here # because a constant is returns a # single number if grade == 0: shape.b = 0 shape.b.fix = True ba: BayesianAnalysis = BayesianAnalysis(model, DataList(xy)) ba.set_sampler("emcee") ba.sampler.setup(n_iterations=500, n_burn_in=200, n_walkers=20) ba.sample(quiet=True) ba.restore_median_fit() coeff = [v.value for _, v in model.free_parameters.items()] log.debug(f"got coeff: {coeff}") final_polynomial = Polynomial(coeff) final_polynomial.set_covariace_matrix( ba.results.estimate_covariance_matrix()) min_log_likelihood = xy.get_log_like() log.debug(f"-min loglike: {-min_log_likelihood}") return final_polynomial, -min_log_likelihood