def chisquare(dist, fit_result, data, bins=None, range=None): """ Perform a Chi^2 test for goodness of fit. Tests the H0 hypothesis if the distances between fit result and data are compatible with random fluctuations. Args: dist: A mle.Distribution instance fit_result: The solution dict, returned by the Distribution.fit method data: The data used in Distribution.fit bins: Number of bins for the histogram (default: 1+log2(N)) range: Range for the histogram (default: min(data), max(data)) Returns: chisquare: the test statistic, chi^2/ndf p-value: the p-value, probability that differences between dist and data are compatible with random fluctuation """ variables = dist.get_vars() if len(variables) > 1: raise ValueError("This is a 1d only chisquare test") var = variables[0] # rule of thumb for number if bins if not provided if bins is None: bins = _np.ceil(2*len(data[var.name])**(1.0/3.0)) entries, edges = _np.histogram(data[var.name], bins=bins, range=range) # get expected frequencies from the cdf cdf = dist.cdf(edges, **fit_result["x"]) exp_entries = _np.round(len(data[var.name]) * (cdf[1:] - cdf[:-1])) # use only bins where more then 4 entries are expected mask = exp_entries >= 5 chisq, pvalue = _chisquare(entries[mask], exp_entries[mask], ddof=len(fit_result["x"]) ) chisq = chisq/(_np.sum(mask) - len(fit_result["x"]) - 1) return chisq, pvalue
def calculatePeakDisplacements(intensityProfiles, peakFitSettings, progressReporter = None, pInitial = None, **curveFitKwargs): """ Fits an ODM FitFunction to the target Series of intensity profiles. Parameters ---------- intensityProfiles : pandas.Series of 1D numpy.ndarray A series of intensityProfiles that will be curve fit peakFitSettings : ODAFitSettings instance The curve fit settings to use for curve fitting progressReporter : ProgressReporter instance The ProgressReporter to use for displaying progress information. A StdOutProgressReporter is used by default. curveFitKwargs : Keyword arguments that will be passed to the curve_fit function (scipy.optimization). Returns ------- A dataframe with the calculated displacements that has the same index as the input intensity profile Series. """ if not progressReporter: progressReporter = _StdOutProgressReporter() fitFunction = peakFitSettings.fitFunction index=intensityProfiles.index if pInitial is not None: p0 = pInitial else: templateProfile = peakFitSettings.referenceIntensityProfile if peakFitSettings.referenceIntensityProfile is not None else intensityProfiles.iloc[0] estimatesDict = fitFunction.estimateInitialParameters(templateProfile, **peakFitSettings.estimatorValuesDict) p0 = estimatesDict.values() xmin = peakFitSettings.xminBound xmax = peakFitSettings.xmaxBound xdata = _np.arange(len(intensityProfiles.iloc[0]))[xmin:xmax] progress = 0.0 total = len(index) curveFitResults = total*[None] for i in range(total): ydata = intensityProfiles.iloc[i][xmin:xmax] popt,pcov = _curve_fit(fitFunction,\ xdata = xdata,\ ydata = ydata,\ p0 = p0,**curveFitKwargs) p0 = popt curveFitResult = {} curveFitResult['popt'] = popt curveFitResult['pcov'] = pcov curveFitResult['chiSquare'] = _chisquare(ydata,fitFunction(xdata,*popt))[0] curveFitResult['curveFitResult'] = attrdict.AttrDict(curveFitResult) curveFitResult['displacement'] = fitFunction.getDisplacement(*popt) curveFitResults[i] = curveFitResult progress += 1 progressReporter.progress(progress / total * 100) df = _pd.DataFrame(index=index,data=curveFitResults) progressReporter.done() return df