Ejemplo n.º 1
0
def fit_with_minuit(cost_func: Union[cost_function.CostFunctionBase, cost_function.SimultaneousFit],
                    minuit_args: T_FitArguments, x: np.ndarray,
                    use_minos: Optional[bool] = False) -> Tuple[base.FitResult, iminuit.Minuit]:
    """ Perform a fit using the given cost function with Minuit.

    Args:
        cost_func: Cost function to be used with Minuit.
        minuit_args: Arguments for minuit. Need to set the initial value, limits, and error (step)
            of each parameter.
        x: x value(s) where the fit is evaluated, which will be stored in the fit result.
        use_minos: Calculate MINOS errors. They have to be accessed through the Minuit object. Default: False.
    Returns:
        (fit_result, Minuit object): The fit result extracts values from the Minuit object, but
            the Minuit object is also returned for good measure.
    """
    # Validation
    # Will raise an exception if there are invalid arguments.
    _validate_minuit_args(cost_func = cost_func, minuit_args = minuit_args)
    # Set the error definition.
    # We check if it's set to the allow the user to override if they are so inclined.
    # (Overriding it should be pretty rare).
    if "errordef" not in minuit_args:
        # Log likelihood cost functions needs an errordef of 0.5 to scale the errors properly, while 1 should
        # be used for chi squared cost functions.
        error_def = 1.0
        if isinstance(cost_func, (cost_function.LogLikelihood, cost_function.BinnedLogLikelihood)):
            error_def = 0.5
        # Store the value.
        minuit_args["errordef"] = error_def

    # Perform the fit
    minuit = iminuit.Minuit(cost_func, **minuit_args)
    minuit.migrad()
    # Just in case (doesn't hurt anything, but may help in a few cases).
    minuit.hesse()
    if use_minos:
        minuit.minos()

    # Check that the fit is actually good
    if not minuit.migrad_ok():
        raise base.FitFailed("Minimization failed! The fit is invalid!")

    # Create the fit result and calculate the errors.
    fit_result = base.FitResult.from_minuit(minuit, cost_func, x)
    # We can calculate the fit errors if the cost function has a single function.
    # If it's a simultaneous fit, it's unclear how best this should be handled. Perhaps it could
    # be unraveled and summed, but it's not obvious that that's the best approach. More likely,
    # one only wants the errors for an individual cost function, so we leave that to the user.
    # We use getattr instead of hasattr to help out mypy
    if isinstance(cost_func, cost_function.CostFunctionBase):
        errors = base.calculate_function_errors(cost_func.f, fit_result, x)
    else:
        errors = []
    fit_result.errors = errors

    return fit_result, minuit
Ejemplo n.º 2
0
    def calculate_errors(self, x: Optional[np.ndarray] = None) -> np.ndarray:
        """ Calculate the errors on the fit function for the given x values.

        Args:
            x: x values where the fit function error should be evaluated. If not specified,
                the x values over which the fit was performed will be used.
        Returns:
            The fit function error calculated at each x value.
        """
        if x is None:
            x = self.fit_result.x
        return base.calculate_function_errors(
            func = self.fit_function,
            fit_result = self.fit_result,
            x = x,
        )
Ejemplo n.º 3
0
def test_binned_cost_functions_against_ROOT(logging_mixin: Any, cost_func: Any,
                                            fit_option: Any,
                                            setup_parabola: Any) -> None:
    """ Test the binned cost function implementations against ROOT. """
    # Setup
    h, h_ROOT = setup_parabola
    ROOT = pytest.importorskip("ROOT")
    minuit_args: Dict[str, Union[float, Tuple[float, float]]] = {
        "scale": 1,
        "error_scale": 0.1,
        "limit_scale": (-1000, 1000),
    }
    log_likelihood = "L" in fit_option
    if cost_func == "probfit":
        probfit = pytest.importorskip("probfit")
        cost_func = probfit.Chi2Regression

    # Fit with ROOT
    fit_ROOT = ROOT.TF1("parabola", "[0] * TMath::Power(x, 2)", -10.5, 10.5)
    # Expect it to be around 1.
    fit_ROOT.SetParameter(0, minuit_args["scale"])
    fit_result_ROOT = h_ROOT.Fit(fit_ROOT, fit_option + "0")
    logger.debug(
        f"ROOT: chi_2: {fit_result_ROOT.Chi2()}, ndf: {fit_result_ROOT.Ndf()}")

    # Fit with the defined cost function
    args: Dict[str, Any] = {"f": parabola}
    if issubclass(cost_func, cost_function.CostFunctionBase):
        args.update({"data": h})
        # Test for weighted likelihood
        if "W" in fit_option:
            args.update({"use_weights": True})
    else:
        args.update({"x": h.x, "y": h.y, "error": h.errors})
    cost = cost_func(**args)
    fit_result, minuit = fit_integration.fit_with_minuit(
        cost, minuit_args, h.x)

    # Check the minimized value.
    # There is still something a bit different between ROOT's log likelihood calculation and mine.
    # However, the other parameters appear to agree, so it seems okay.
    if not log_likelihood:
        assert np.isclose(fit_result.minimum_val,
                          fit_result_ROOT.MinFcnValue(),
                          rtol=0.03)

    if cost_func is cost_function.BinnedLogLikelihood:
        # Calculate the chi squared equivalent and set that to be the minimum value for comparison.
        binned_chi_squared = cost_function._binned_chi_squared(
            h.x, h.y, h.errors, h.bin_edges, parabola,
            *list(fit_result.values_at_minimum.values()))
        unbinned_chi_squared = cost_function._chi_squared(
            h.x, h.y, h.errors, h.bin_edges, parabola,
            *list(fit_result.values_at_minimum.values()))
        logger.debug(
            f"minimal_val before changing: {fit_result.minimum_val}, ROOT func min: {fit_result_ROOT.MinFcnValue()}"
        )
        logger.debug(
            f"binned chi_squared: {binned_chi_squared}, unbinned chi_squared: {unbinned_chi_squared}"
        )
        fit_result.minimum_val = binned_chi_squared

    # Calculate errors.
    fit_result.errors = fit_base.calculate_function_errors(
        func=parabola, fit_result=fit_result, x=fit_result.x)

    # Check the result
    logger.debug(
        f"Fit chi_2: {fit_result.minimum_val}, ndf: {fit_result.nDOF}")
    # It won't agree exactly because ROOT appears to use the unbinned chi squared to calculate this value.
    # This can be seen because probfit agrees with ROOT.
    assert np.isclose(fit_result.minimum_val,
                      fit_result_ROOT.Chi2(),
                      rtol=0.035)
    assert np.isclose(fit_result.nDOF, fit_result_ROOT.Ndf())
    # Check the parameters
    # Value
    assert np.isclose(
        fit_result.values_at_minimum["scale"],
        fit_result_ROOT.Parameter(0),
        rtol=0.05,
    )
    # Error
    assert np.isclose(fit_result.errors_on_parameters["scale"],
                      fit_result_ROOT.ParError(0),
                      rtol=0.005)
    # Covariance matrix
    if issubclass(cost_func, cost_function.CostFunctionBase):
        covariance_ROOT = fit_result_ROOT.GetCovarianceMatrix()
        # Print the fit result, alongside the covariance
        fit_result_ROOT.Print("V")
        logger.debug(f"Covariance: {fit_result.covariance_matrix}")
        for i_name in fit_result.free_parameters:
            for j_name in fit_result.free_parameters:
                i_index = fit_result.free_parameters.index(i_name)
                j_index = fit_result.free_parameters.index(j_name)
                logger.debug(
                    f"Checking covariance matrix parameters: ({i_name}:{i_index}, {j_name}:{j_index})"
                )
                assert np.isclose(fit_result.covariance_matrix[(i_name,
                                                                j_name)],
                                  covariance_ROOT(i_index, j_index),
                                  rtol=0.01)
    # Estimated distance to minimum
    assert np.isclose(minuit.fmin.edm, fit_result_ROOT.Edm(), atol=1e-3)

    # Check the effective chi squared. This won't work in the probfit case because we don't recognize
    # the type properly (and it's not worth the effort).
    if issubclass(cost_func, cost_function.CostFunctionBase):
        assert fit_result.effective_chi_squared(cost) == (
            cost_function._binned_chi_squared(
                cost.data.x,
                cost.data.y,
                cost.data.errors,
                cost.data.bin_edges,
                cost.f,
                *fit_result.values_at_minimum.values(),
            ) if log_likelihood else fit_result.minimum_val)
Ejemplo n.º 4
0
def test_binned_cost_functions_against_ROOT(logging_mixin: Any, cost_func: Any,
                                            fit_option: Any,
                                            setup_parabola: Any) -> None:
    """ Test the binned cost function implementations against ROOT. """
    # Setup
    h, h_ROOT = setup_parabola
    ROOT = pytest.importorskip("ROOT")
    minuit_args: Dict[str, Union[float, Tuple[float, float]]] = {
        "scale": 1,
        "error_scale": 0.1,
        "limit_scale": (-1000, 1000),
    }
    log_likelihood = "L" in fit_option
    if cost_func == "probfit":
        probfit = pytest.importorskip("probfit")
        cost_func = probfit.Chi2Regression

    # Fit with ROOT
    fit_ROOT = ROOT.TF1("parabola", "[0] * TMath::Power(x, 2)", -10.5, 10.5)
    # Expect it to be around 1.
    fit_ROOT.SetParameter(0, minuit_args["scale"])
    fit_result_ROOT = h_ROOT.Fit(fit_ROOT, fit_option + "0")
    logger.debug(
        f"ROOT: chi_2: {fit_result_ROOT.Chi2()}, ndf: {fit_result_ROOT.Ndf()}")

    # Fit with the defined cost function
    args = {"f": parabola}
    if issubclass(cost_func, cost_function.CostFunctionBase):
        args.update({"data": h})
    else:
        args.update({"x": h.x, "y": h.y, "error": h.errors})
    cost = cost_func(**args)
    fit_result, _ = fit_integration.fit_with_minuit(cost, minuit_args, h.x)

    # Check the minimized value.
    # It doesn't appear that it will agree for log likelihood
    if not log_likelihood:
        assert np.isclose(fit_result.minimum_val,
                          fit_result_ROOT.MinFcnValue(),
                          rtol=0.03)

    if cost_func is cost_function.BinnedLogLikelihood:
        # Calculate the chi squared equivalent and set that to be the minimum value for comparison.
        binned_chi_squared = cost_function._binned_chi_squared(
            h.x, h.y, h.errors, h.bin_edges, parabola,
            *list(fit_result.values_at_minimum.values()))
        unbinned_chi_squared = cost_function._chi_squared(
            h.x, h.y, h.errors, h.bin_edges, parabola,
            *list(fit_result.values_at_minimum.values()))
        logger.debug(
            f"minimual_val before changing: {fit_result.minimum_val}, ROOT func min: {fit_result_ROOT.MinFcnValue()}"
        )
        logger.debug(
            f"binned chi_squared: {binned_chi_squared}, unbinned chi_squared: {unbinned_chi_squared}"
        )
        fit_result.minimum_val = binned_chi_squared

    # Calculate errors.
    fit_result.errors = fit_base.calculate_function_errors(
        func=parabola, fit_result=fit_result, x=fit_result.x)

    # Check the result
    logger.debug(
        f"Fit chi_2: {fit_result.minimum_val}, ndf: {fit_result.nDOF}")
    # It won't agree exactly because ROOT appears to use the unbinned chi squared to calculate this value.
    # This can be seen because probfit agress with ROOT.
    assert np.isclose(fit_result.minimum_val,
                      fit_result_ROOT.Chi2(),
                      rtol=0.035)
    assert np.isclose(fit_result.nDOF, fit_result_ROOT.Ndf())
    # Check the parameters
    # Value
    assert np.isclose(
        fit_result.values_at_minimum["scale"],
        fit_result_ROOT.Parameter(0),
        rtol=0.05,
    )
    # Error
    # TODO: For some reason, there error is substantially larger in the log likelihood cost function comapred to ROOT
    # This requires more investigation, but shouldn't totally derail progress at the moment.
    if not log_likelihood:
        assert np.isclose(fit_result.errors_on_parameters["scale"],
                          fit_result_ROOT.ParError(0),
                          rtol=0.005)
    # Check the effective chi squared. This won't work in the probfit case because we don't recognize
    # the type properly (and it's not worth the effort).
    if issubclass(cost_func, cost_function.CostFunctionBase):
        assert fit_result.effective_chi_squared(cost) == (
            cost_function._binned_chi_squared(
                cost.data.x, cost.data.y, cost.data.errors,
                cost.data.bin_edges, cost.f,
                *fit_result.values_at_minimum.values())
            if log_likelihood else fit_result.minimum_val)
Ejemplo n.º 5
0
def fit_with_minuit(
    cost_func: Union[cost_function.CostFunctionBase,
                     cost_function.SimultaneousFit],
    minuit_args: T_FitArguments,
    x: npt.NDArray[Any],
    use_minos: Optional[bool] = False,
) -> Tuple[base.FitResult, iminuit.Minuit]:
    """Perform a fit using the given cost function with Minuit.

    Args:
        cost_func: Cost function to be used with Minuit.
        minuit_args: Arguments for minuit. Need to set the initial value, limits, and error (step)
            of each parameter.
        x: x value(s) where the fit is evaluated, which will be stored in the fit result.
        use_minos: Calculate MINOS errors. They have to be accessed through the Minuit object. Default: False.
    Returns:
        (fit_result, Minuit object): The fit result extracts values from the Minuit object, but
            the Minuit object is also returned for good measure.
    """
    # Validation
    # Will raise an exception if there are invalid arguments.
    _validate_minuit_args(cost_func=cost_func, minuit_args=minuit_args)
    # Copy the minuit_args so we don't cause issues elsewhere when we pop values
    minuit_args = dict(minuit_args)
    # Set the error definition.
    # We check if it's set to the allow the user to override if they are so inclined.
    # (Overriding it should be pretty rare).
    if "errordef" not in minuit_args:
        # Log likelihood cost functions needs an errordef of 0.5 to scale the errors properly, while 1 should
        # be used for chi squared cost functions.
        error_def = 1.0
        if isinstance(
                cost_func,
            (cost_function.LogLikelihood, cost_function.BinnedLogLikelihood)):
            error_def = 0.5
        # Store the value.
        minuit_args["errordef"] = error_def

    # Transform into iminuit 2 args
    # This isn't the cleanest thing to do, but it avoids having to changes interfaces for now (July 2021)
    # Errors
    error_args_names = [k for k in minuit_args if "error_" in k]
    error_args = {
        k.replace("error_", ""): minuit_args.pop(k)
        for k in error_args_names if "error_" in k
    }
    # Limits
    limit_args_names = [k for k in minuit_args if "limit_" in k]
    limit_args = {
        k.replace("limit_", ""): minuit_args.pop(k)
        for k in limit_args_names if "limit_" in k
    }
    # Fixed
    fixed_args_names = [k for k in minuit_args if "fix_" in k]
    fixed_args = {
        k.replace("fix_", ""): minuit_args.pop(k)
        for k in fixed_args_names if "fix_" in k
    }
    # errordef
    error_def_arg = minuit_args.pop("errordef")

    # Perform the fit
    minuit = iminuit.Minuit(cost_func, **minuit_args)
    # Set iminuit 2 interface args
    # NOTE: Can't assign the values directly - need to loop parameter by parameter
    for k, v in limit_args.items():
        minuit.limits[k] = v
    for k, v in fixed_args.items():
        minuit.fixed[k] = v
    for k, v in error_args.items():
        minuit.errors[k] = v
    minuit.errordef = error_def_arg
    # Improve minimization reliability.
    minuit.strategy = 2
    minuit.migrad()
    # Just in case (doesn't hurt anything, but may help in a few cases).
    minuit.hesse()
    if use_minos:
        minuit.minos()

    # Check that the fit is actually good
    if not minuit.valid:
        raise base.FitFailed("Minimization failed! The fit is invalid!")
    # Check covariance matrix accuracy. We need to check it explicitly because It appears that it is not
    # included in the migrad_ok status check.
    if not minuit.accurate:
        raise base.FitFailed(
            "Covariance matrix is inaccurate! The fit is invalid!")

    # Create the fit result and calculate the errors.
    fit_result = base.FitResult.from_minuit(minuit, cost_func, x)
    # We can calculate the fit errors if the cost function has a single function.
    # If it's a simultaneous fit, it's unclear how best this should be handled. Perhaps it could
    # be unraveled and summed, but it's not obvious that that's the best approach. More likely,
    # one only wants the errors for an individual cost function, so we leave that to the user.
    # We use getattr instead of hasattr to help out mypy
    if isinstance(cost_func, cost_function.CostFunctionBase):
        errors = base.calculate_function_errors(cost_func.f, fit_result, x)
    else:
        errors = np.array([])
    fit_result.errors = errors

    return fit_result, minuit