예제 #1
0
def chisquare(dist, fit_result, data, bins=None, range=None):
    """
    Perform a Chi^2 test for goodness of fit.

    Tests the H0 hypothesis if the distances between fit result and
    data are compatible  with random fluctuations.

    Args:
        dist:         A mle.Distribution instance
        fit_result:   The solution dict, returned by the Distribution.fit method
        data:         The data used in Distribution.fit
        bins:         Number of bins for the histogram (default: 1+log2(N))
        range:        Range for the histogram (default: min(data), max(data))
    Returns:
        chisquare:    the test statistic, chi^2/ndf
        p-value:      the p-value, probability that differences between dist
                      and data are compatible with random fluctuation
    """

    variables = dist.get_vars()
    if len(variables) > 1:
        raise ValueError("This is a 1d only chisquare test")
    var = variables[0]

    # rule of thumb for number if bins if not provided
    if bins is None:
        bins = _np.ceil(2*len(data[var.name])**(1.0/3.0))

    entries, edges = _np.histogram(data[var.name], bins=bins, range=range)

    # get expected frequencies from the cdf
    cdf = dist.cdf(edges, **fit_result["x"])
    exp_entries = _np.round(len(data[var.name]) * (cdf[1:] - cdf[:-1]))

    # use only bins where more then 4 entries are expected
    mask = exp_entries >= 5

    chisq, pvalue = _chisquare(entries[mask],
                               exp_entries[mask],
                               ddof=len(fit_result["x"])
                               )
    chisq = chisq/(_np.sum(mask) - len(fit_result["x"]) - 1)
    return chisq, pvalue
예제 #2
0
def calculatePeakDisplacements(intensityProfiles, peakFitSettings, progressReporter = None, pInitial = None, **curveFitKwargs):
    """
    Fits an ODM FitFunction to the target Series of intensity profiles.
    
    Parameters
    ----------
    
    intensityProfiles : pandas.Series of 1D numpy.ndarray
        A series of intensityProfiles that will be curve fit
    peakFitSettings : ODAFitSettings instance
        The curve fit settings to use for curve fitting
    progressReporter : ProgressReporter instance
        The ProgressReporter to use for displaying progress information. 
        A StdOutProgressReporter is used by default.
    curveFitKwargs : Keyword arguments that will be passed to the curve_fit
        function (scipy.optimization).
    
    
    Returns
    -------

    A dataframe with the calculated displacements that has the same index as the input
    intensity profile Series.
    """
    
    if not progressReporter:
        progressReporter = _StdOutProgressReporter()
    
    fitFunction = peakFitSettings.fitFunction
    index=intensityProfiles.index
    
    if pInitial is not None:        
        p0 = pInitial
    else:
        templateProfile = peakFitSettings.referenceIntensityProfile if peakFitSettings.referenceIntensityProfile is not None else intensityProfiles.iloc[0]
        estimatesDict = fitFunction.estimateInitialParameters(templateProfile, **peakFitSettings.estimatorValuesDict)
        p0 = estimatesDict.values()
        
    xmin = peakFitSettings.xminBound
    xmax = peakFitSettings.xmaxBound
    xdata = _np.arange(len(intensityProfiles.iloc[0]))[xmin:xmax]
    
    progress = 0.0
    total = len(index)
    curveFitResults = total*[None]
    for i in range(total):
         ydata = intensityProfiles.iloc[i][xmin:xmax]
         popt,pcov = _curve_fit(fitFunction,\
                  xdata = xdata,\
                  ydata = ydata,\
                  p0 = p0,**curveFitKwargs)
         p0 = popt
         
         curveFitResult = {}       

         curveFitResult['popt'] = popt
         curveFitResult['pcov'] = pcov         
         curveFitResult['chiSquare'] = _chisquare(ydata,fitFunction(xdata,*popt))[0]
         curveFitResult['curveFitResult'] = attrdict.AttrDict(curveFitResult)
         curveFitResult['displacement'] = fitFunction.getDisplacement(*popt)
         
         curveFitResults[i] = curveFitResult
         
         progress += 1
         progressReporter.progress(progress / total * 100)
    
    df = _pd.DataFrame(index=index,data=curveFitResults)
    
    progressReporter.done()
    
    return df