def doFindHistPeaks(outHistDict, errFactor, smoothIndexesForPeakFinder, verbose): doShow = True folderList = outHistDict.keys() for singleFolder in folderList: for key in outHistDict[singleFolder].keys(): (hist, binCenters) = outHistDict[singleFolder][key] plotDict = initializeTestPlots(doShow, verbose) plotDict = appendToTestPlots(plotDict, hist, binCenters, legendLabel=singleFolder + ' ' + key, fmt='None', markersize=4, alpha=1.0, ls='solid', lineWidth=2) guessParametersSet = peakFinder(hist, binCenters, numberOfIndexesToSmoothOver=smoothIndexesForPeakFinder, errFactor=errFactor, showPlot_peakFinder=True, verbose=verbose) for (index, (amp, mean, sigma)) in list(enumerate(guessParametersSet)): plotDict = appendToTestPlots(plotDict, gaussian(binCenters, amp, mean, sigma), binCenters, legendLabel=singleFolder + ' ' + key, fmt='None', markersize=4, alpha=1.0, ls=ls[index % 3], lineWidth=1) if (plotDict['doShow'] or plotDict['savePlot']): quickPlotter(plotDict) return
def mariscotti(y, **kwargs): keys = kwargs.keys() y = numpy.array(y) # defaults vary = y f1 = float(1.0) # kwargs nsmooth if 'nsmooth' in keys: m = kwargs['nsmooth'] else: m = 5 # kwargs err if 'err' in keys: err = kwargs['err'] vary=float(err) ** 2.0 # kwargs factor if 'errFactor' in keys: factor = float(kwargs['errFactor']) f1=factor # kwargs pk_gsd if 'pk_gsd' in keys: pk_gsd = kwargs['pk_gsd'] else: pk_gsd = None # kwargs pk_gsd if 'plot' in keys: showPlot = kwargs['plot'] else: showPlot = False # kwargs pk_gsd if 'verbose' in keys: verbose = kwargs['verbose'] else: verbose = False if verbose: print "Starting the Mariscotti peak finding and Gaussian parametrization algorithm." # rudimentary peak finder based on Mariscotti [1966] kernel1 = numpy.array([-1,2,-1]) # gsd stands for generalized second derivative gsd = numpy.convolve(y, kernel1, 'same') # The line below was translated form IDL code that was in a mysterious loop that seemed to make the exact # same variable assignment 'm' times. gsd = boxCar(gsd, kernalSize=m, mode='same') kernel2 = numpy.array([1, 4, 1]) err = numpy.convolve(vary, kernel2, 'same') # The line below was translated form IDL code that was in a mysterious loop that seemed to make the exact # same variable assignment 'm' times. err = boxCar(err, kernalSize=m, mode='same') # standard deviation of the GSD err = ((float(1.0) / (float(m) ** m)) * err) ** float(0.5) # find the zero crossings l1 = 4 * (m - 1) / 2 + 1 l2 = len(vary) - l1 icross = [] for i in range(l1 + 1, l2): if (gsd[i] < 0.) and (gsd[i-1] > 0.): icross.append(i - 1) if (gsd[i] > 0.) and (gsd[i-1] < 0.): icross.append(i) if icross == []: print 'No places where the second derivative crosses zero, so no peaks were found by Mariscotti algorithm.' if showPlot: plotDict = {} plotDict['verbose'] = verbose # plot formatting for index (x) and spectrum values (y) where the gsd (generalized 2nd derivative) # crosses zero (this is marks a boundaries for finding local extrema) x = range(len(y)) # plot formatting for the gsd (generalized second derivative) gsd_rescaled = rescale(y, gsd) gsd_zeroLine = rescale(y, gsd, numpy.zeros(len(y))) # These can be a list or a single value plotDict['yData'] = [y, gsd_rescaled, gsd_zeroLine] plotDict['xData'] = [x, x, x] plotDict['colors'] = ['firebrick', 'darkorchid', "black"] plotDict['legendLabel'] = ['The data', '2nd derivative', '2nd deri = 0'] plotDict['fmt'] = ['None', 'None', 'None'] plotDict['markersize'] = [5, 5, 5] plotDict['alpha'] = [1.0, 1.0, 1.0] plotDict['ls'] = ['-', 'dashed', 'dotted'] plotDict['lineWidth'] = [2, 1, 1] # These must be a single value plotDict['title'] = '2nd Derivative Zero Crossing Zero (None Found!)' plotDict['xlabel'] = 'Channel Number' plotDict['legendAutoLabel'] = False plotDict['doLegend'] = True plotDict['legendLoc'] = 0 plotDict['legendNumPoints'] = 3 plotDict['legendHandleLength'] = 5 plotDict['doShow'] = True quickPlotter(plotDict=plotDict) if verbose: print "Mariscotti algorithm completed.\n" return [] # defined the outputs gaussParameters = [] # find the peaks maxAllowedError = f1 * err for i in range(len(icross) - 1): icrossStart = icross[i] icrossStop = icross[i + 1] # check that that there are no subsections of zero length (can happen because of the cautious crossover finder # used on a noising part of the data) if icrossStart != icrossStop: gsd_subSample = gsd[icrossStart:icrossStop] # Determine is the subset is a peak of a vally gsd_subSample_lessThenZero = gsd_subSample[gsd_subSample < 0.0] count_gsd_subSample_lessThenZero = len(gsd_subSample_lessThenZero) # this is true if the subsection is a peak if count_gsd_subSample_lessThenZero == 0: indexList = numpy.arange(len(gsd_subSample)) if pk_gsd is not None: maxval = max(gsd_subSample) indexOfPeak_gsd_subSample = list(indexList[gsd_subSample == maxval]) else: y_subSmaple = y[icrossStart:icrossStop] maxval = max(y_subSmaple) indexOfPeak_gsd_subSample = list(indexList[y_subSmaple == maxval]) numOfIndexWithPeakValue = len(indexOfPeak_gsd_subSample) if 1 == numOfIndexWithPeakValue: indexOfPeak_y = indexOfPeak_gsd_subSample[0] + icross[i] # if more than one index with the max value, take the max GSD value closest to the max y value elif 1 < numOfIndexWithPeakValue: highestValueGSD = float('-Inf') chooseIndex = None for testIndex in indexOfPeak_gsd_subSample: currentValueGSD = gsd_subSample[testIndex] if highestValueGSD < currentValueGSD: highestValueGSD = currentValueGSD chooseIndex = testIndex indexOfPeak_y = chooseIndex + icross[i] else: indexOfPeak_y = None if indexOfPeak_y is not None: if maxAllowedError[indexOfPeak_y] < gsd[indexOfPeak_y]: sigma = float(icrossStop - icrossStart)/float(2.0) gaussParameters.append((maxval, indexOfPeak_y, sigma)) if gaussParameters == []: print "No peaks were found by the Mariscotti algorithm, "+ \ "but there were places where the second derivative crossed zero." print "This can happen if the maximum allowed error at a peak is greater then the generalized "+\ "second derivative (gsd) at that point." print "The 'errFactor' =", factor, "can be set using the kwarg 'errFactor' to scale the "+\ "maximum allowed error." if showPlot: plotDict = {} plotDict['verbose'] = verbose # plot formatting for index (x) and spectrum values (y) where the gsd (generalized 2nd derivative) # crosses zero (this is marks a boundaries for finding local extrema) y_icrossVals = [y[icrossVal] for icrossVal in icross] x = range(len(y)) # plot formatting for the gsd (generalized second derivative) gsd_rescaled = rescale(y, gsd) gsd_zeroLine = rescale(y, gsd, numpy.zeros(len(y))) gsd_zeroLine_icrossVals = rescale(y, gsd, numpy.zeros(len(icross))) maxAllowedError_rescaled = rescale(y, gsd, maxAllowedError) # These can be a list or a single value plotDict['yData'] = [y, maxAllowedError_rescaled, gsd_rescaled, gsd_zeroLine, gsd_zeroLine_icrossVals, y_icrossVals] plotDict['xData'] = [x, x, x, x, icross, icross] plotDict['colors'] = ['firebrick', 'LawnGreen', 'darkorchid', "black", 'black', 'dodgerblue'] plotDict['legendLabel'] = ['The data', 'max allowed error', '2nd derivative', '2nd deri = 0', 'cross point', 'cross point'] plotDict['fmt'] = ['None', 'None', 'None', 'None', 'x', 'o'] plotDict['markersize'] = [5, 5, 5, 5, 10, 9] plotDict['alpha'] = [1.0, 1.0, 1.0, 1.0, 0.7, 0.7] plotDict['ls'] = ['-', '-', 'dashed', 'dotted', 'None', 'None'] plotDict['lineWidth'] = [2, 1, 1, 1, 1, 1] # These must be a single value plotDict['title'] = '2nd Derivative Zero Crossing Zero and Peaks Found' plotDict['xlabel'] = 'Channel Number' plotDict['legendAutoLabel'] = False plotDict['doLegend'] = True plotDict['legendLoc'] = 0 plotDict['legendNumPoints'] = 3 plotDict['legendHandleLength'] = 5 plotDict['doShow'] = True quickPlotter(plotDict=plotDict) if verbose: print "Mariscotti algorithm completed.\n" return gaussParameters else: gaussParametersArray = numpy.array(gaussParameters) numOfPeaksFound = len(gaussParametersArray[:,0]) if verbose: optional_s = '' optional_es = '' if 1 < numOfPeaksFound: optional_s += 's' optional_es += 'es' print "A gaussian distribution is defined as G(x)=A * exp((x-B)^2 / (2 * C^2))" print "The Mariscotti algorithm has identified", numOfPeaksFound , "peak" + optional_s + "." print "The index" + optional_es + " of the data where the peak can be found (B) are:", gaussParametersArray[:,1] print "And the corresponding data value" + optional_s + " for the peak" + optional_s + " (A) are data values:", gaussParametersArray[:,0] print "Finally, the sigma value" + optional_s + " (C) of the peak" + optional_s + " (calculated from the inflection points) are", gaussParametersArray[:,2] if showPlot: plotDict = {} plotDict['verbose'] = verbose # plot formatting for index (x) and spectrum values (y) where the gsd (generalized 2nd derivative) # crosses zero (this is marks a boundaries for finding local extrema) y_icrossVals = [y[icrossVal] for icrossVal in icross] x = range(len(y)) # plot formatting for the gsd (generalized second derivative) gsd_rescaled = rescale(y, gsd) gsd_zeroLine = rescale(y, gsd, numpy.zeros(len(y))) gsd_zeroLine_icrossVals = rescale(y, gsd, numpy.zeros(len(icross))) maxAllowedError_rescaled = rescale(y, gsd, maxAllowedError) # These can be a list or a single value plotDict['yData'] = [y, maxAllowedError_rescaled, gsd_rescaled, gsd_zeroLine, gsd_zeroLine_icrossVals, y_icrossVals, gaussParametersArray[:,0]] plotDict['xData'] = [x, x, x, x, icross, icross, gaussParametersArray[:,1]] plotDict['colors'] = ['firebrick', 'LawnGreen', 'darkorchid', "black", 'black', 'dodgerblue', 'darkorange'] plotDict['legendLabel'] = ['The data', 'max allowed error', '2nd derivative', '2nd deri = 0', 'cross point','cross point', 'found peaks'] plotDict['fmt'] = ['None', 'None', 'None', 'None', 'x', 'o', 'd'] plotDict['markersize'] = [5, 5, 5, 5, 10, 9, 10] plotDict['alpha'] = [1.0, 1.0, 1.0, 1.0, 0.7, 0.7, 0.7] plotDict['ls'] = ['-', '-', 'dashed', 'dotted', 'None', 'None', 'None'] plotDict['lineWidth'] = [2, 1, 1, 1, 1, 1, 1] # These must be a single value plotDict['title'] = '2nd Derivative Zero Crossing Zero and Peaks Found' plotDict['xlabel'] = 'Channel Number' plotDict['legendAutoLabel'] = False plotDict['doLegend'] = True plotDict['legendLoc'] = 0 plotDict['legendNumPoints'] = 3 plotDict['legendHandleLength'] = 5 plotDict['doShow'] = True quickPlotter(plotDict=plotDict) if verbose: print "Mariscotti algorithm completed.\n" return gaussParametersArray
def extractPulseInfo(folderName, fileNamePrefix='', filenameSuffix='', columnNamesToIgnore=['time'], skipRows=1, delimiter=',', trimBeforeMin=True, multiplesOfMedianStdForRejection=None, conv_channels=1, numOfExponents=1, calcFitForEachPulse=False, upperBoundAmp=float('inf'), showTestPlots_Pulses=False, testModeReadIn=False, verbose=True): listOfDataDicts = loadPulses(folderName, fileNamePrefix=fileNamePrefix, filenameSuffix=filenameSuffix, skipRows=skipRows, delimiter=delimiter, testMode=testModeReadIn, verbose=verbose) numOfDataDicts = len(listOfDataDicts) modLen = max((int(numOfDataDicts / 200.0), 1)) if verbose: if conv_channels > 1: print "The data is to be smoothed with a top hat kernel of " + str( conv_channels) + " channels." if trimBeforeMin: print "All the data values before the minimum value will be trimmed away before being saved." print " " columnNamesToIgnore.append('fileName'.lower()) columnNamesToIgnore.append('uniqueID'.lower()) columnNamesToIgnore.append('xData'.lower()) # initialize the dictionary to extract data from processed pulses listOfPulseDicts = [] for IDindex in range(numOfDataDicts): tableDict = listOfDataDicts[IDindex] uniqueID = tableDict['uniqueID'] tableDict = listOfDataDicts[IDindex] dataKeys = [] tableDict['xData'] = None tableDictKeys = tableDict.keys() for testKey in tableDictKeys: if not testKey.lower() in columnNamesToIgnore: dataKeys.append(testKey) if 'time' == testKey.lower(): tableDict['xData'] = tableDict[testKey] plotDict = initializeTestPlots(showTestPlots_Pulses, verbose) for key in dataKeys: # make a new dictionary for each pulse, there may be many pulses in tableDict pulseDict = {} # assign the pulse yData pulseDict['arrayData'] = tableDict[key] # if the pulse has x Data, assign it if tableDict['xData'] is None: pulseDict['xData'] = len(pulseDict['arrayData']) else: pulseDict['xData'] = tableDict['xData'] # assign a unique iD to this pulse for later identification. pulseDict['uniqueID'] = key.replace(' ', '_') + '_' + uniqueID pulseDict['rawDataFileName'] = tableDict['fileName'] # process the pulse pulseDict, plotDict = pulsePipeline( pulseDict, plotDict, multiplesOfMedianStdForRejection, conv_channels, trimBeforeMin, numOfExponents, calcFitForEachPulse, upperBoundAmp) listOfPulseDicts.append(pulseDict) if showTestPlots_Pulses: quickPlotter(plotDict=plotDict) if verbose: if IDindex % modLen == 0: print "Pulse operations are " \ + str('%02.2f' % (IDindex * 100.0 / float(numOfDataDicts))) + " % complete." return listOfPulseDicts
def calcP_funcForSI(charArray1, charArray2, charArrayTestPlotsFilename=None, useFittedFunction=True, xStep=1.0e-8, xTruncateAfter_s=float('inf'), numOfExponents=2, upperBoundAmp=float('inf'), verbose=True): if charArrayTestPlotsFilename is None: savePlot = False else: savePlot = True plotDict = initializeTestPlots(doShow=False, verbose=verbose, doSave=savePlot, plotFileName=charArrayTestPlotsFilename, title='Characteristic Functions') # Calculations for Characteristic Function 1 char1Len = len(charArray1) dateLen1_s = char1Len * float(xStep) if dateLen1_s > xTruncateAfter_s: char1Len = int(numpy.round(xTruncateAfter_s / float(xStep))) charPulseDict1 = { 'keptData': numpy.array(charArray1[:char1Len]), 'keptXData': numpy.arange(0.0, char1Len * float(xStep), xStep) } plotDict = appendToTestPlots(plotDict, charPulseDict1['keptData'], charPulseDict1['keptXData'], legendLabel='Function 1', fmt='None', markersize=4, alpha=1.0, ls='solid', lineWidth=1) # fit with a sum of exponential fittedAmpTau1, charPulseDict1['fittedCost'], junk \ = fittingSumOfPowers(charPulseDict1['keptData'], charPulseDict1['keptXData'], numOfExponents, plotDict, upperBoundAmp) if fittedAmpTau1 is not None: for (index, (amp, tau)) in list(enumerate(fittedAmpTau1)): charPulseDict1['fittedAmp' + str(index + 1)] = amp charPulseDict1['fittedTau' + str(index + 1)] = tau else: for index in range(numOfExponents): charPulseDict1['fittedAmp' + str(index + 1)] = None charPulseDict1['fittedTau' + str(index + 1)] = None # Calculations for Characteristic Function 2 char2Len = len(charArray2) dateLen2_s = char2Len * float(xStep) if dateLen2_s > xTruncateAfter_s: char2Len = int(numpy.round(xTruncateAfter_s / float(xStep))) charPulseDict2 = { 'keptData': numpy.array(charArray2[:char2Len]), 'keptXData': numpy.arange(0.0, char2Len * float(xStep), xStep) } plotDict = appendToTestPlots(plotDict, charPulseDict2['keptData'], charPulseDict2['keptXData'], legendLabel='Function 2', fmt='None', markersize=4, alpha=1.0, ls='solid', lineWidth=1) # fit with a sum of exponential fittedAmpTau2, charPulseDict2['fittedCost'], junk \ = fittingSumOfPowers(charPulseDict2['keptData'], charPulseDict2['keptXData'], numOfExponents, plotDict, upperBoundAmp) if fittedAmpTau2 is not None: for (index, (amp, tau)) in list(enumerate(fittedAmpTau2)): charPulseDict2['fittedAmp' + str(index + 1)] = amp charPulseDict2['fittedTau' + str(index + 1)] = tau else: for index in range(numOfExponents): charPulseDict2['fittedAmp' + str(index + 1)] = None charPulseDict2['fittedTau' + str(index + 1)] = None # replace the real-data average with a fitted function for the shaping indicator (SI) calculation if useFittedFunction: fit1_success = True for sumIndex in range(numOfExponents): if charPulseDict1['fittedAmp' + str(sumIndex + 1)] is None: fit1_success = False break if charPulseDict1['fittedTau' + str(sumIndex + 1)] is None: fit1_success = False break if fit1_success: newArray = numpy.zeros((char1Len)) for sumIndex in range(numOfExponents): newArray += naturalPower( charPulseDict1['keptXData'], charPulseDict1['fittedAmp' + str(sumIndex + 1)], charPulseDict1['fittedTau' + str(sumIndex + 1)]) charArray1 = newArray else: plotDict = appendToTestPlots( plotDict, charPulseDict1['keptData'], charPulseDict1['keptXData'], legendLabel='Function 1 FIT FAILED SHOWING AVERAGE PULSE', fmt='None', markersize=4, alpha=1.0, ls='dashed', lineWidth=1) fit2_success = True for sumIndex in range(numOfExponents): if charPulseDict2['fittedAmp' + str(sumIndex + 1)] is None: fit2_success = False break if charPulseDict2['fittedTau' + str(sumIndex + 1)] is None: fit2_success = False break if fit2_success: newArray = numpy.zeros((char2Len)) for sumIndex in range(numOfExponents): newArray += naturalPower( charPulseDict2['keptXData'], charPulseDict2['fittedAmp' + str(sumIndex + 1)], charPulseDict2['fittedTau' + str(sumIndex + 1)]) charArray2 = newArray else: plotDict = appendToTestPlots( plotDict, charPulseDict2['keptData'], charPulseDict2['keptXData'], legendLabel='FUNCTION 2 FIT FAILED SHOWING AVERAGE PULSE', fmt='None', markersize=4, alpha=1.0, ls='dashed', lineWidth=1) # Calculate the shaping indicator SI minCharLen = numpy.min((char1Len, char2Len)) # Normalize the characteristic functions to have the integral values to be equal to unity (one) charArray1_forPfunc = charArray1[:minCharLen] / numpy.sum( charArray1[:minCharLen]) charArray2_forPfunc = charArray2[:minCharLen] / numpy.sum( charArray2[:minCharLen]) # The P-function calculation Pfunc = (charArray1_forPfunc - charArray2_forPfunc) / (charArray1_forPfunc + charArray2_forPfunc) plotDict = appendToTestPlots(plotDict, Pfunc, numpy.arange(0.0, minCharLen * float(xStep), xStep), legendLabel='Calculated P(t)', fmt='None', markersize=4, alpha=1.0, ls='dotted', lineWidth=2) # plotDict['doShow'] = True quickPlotter(plotDict) # calculate integrals charPulseDict1['integral'], junk = calcIntegral( charPulseDict1['keptData'], charPulseDict1['keptXData'], plotDict) charPulseDict2['integral'], junk = calcIntegral( charPulseDict2['keptData'], charPulseDict2['keptXData'], plotDict) return Pfunc, charPulseDict1, charPulseDict2
def listGaussFitter(spectrum, x, errFactor=1, numberOfIndexesToSmoothOver=1, showPlot_peakFinder=False, showPlot_gaussFitters=False, verbose=False): # apply the mariscotti peak finding algorithm guessParametersSet = peakFinder( spectrum, x, numberOfIndexesToSmoothOver=numberOfIndexesToSmoothOver, errFactor=errFactor, showPlot_peakFinder=showPlot_peakFinder, verbose=verbose) # piloting initialization and defaults if showPlot_gaussFitters: plotDict = {} plotDict['verbose'] = verbose plotDict['doShow'] = showPlot_gaussFitters # These can be a list or a single value, here we initialize a list. plotDict['yData'] = [] plotDict['xData'] = [] plotDict['legendLabel'] = [] plotDict['fmt'] = [] plotDict['markersize'] = [] plotDict['alpha'] = [] plotDict['ls'] = [] plotDict['lineWidth'] = [] # These must be a single value plotDict['title'] = '' plotDict['xlabel'] = 'Channel Number' plotDict['legendAutoLabel'] = False plotDict['doLegend'] = True plotDict['legendLoc'] = 0 plotDict['legendNumPoints'] = 3 plotDict['legendHandleLength'] = 5 plotDict['clearAtTheEnd'] = False # append the plot values for the raw spectrum plotDict['yData'].append(spectrum[:]) plotDict['xData'].append(x) plotDict['legendLabel'].append('rawData') plotDict['fmt'].append('None') plotDict['markersize'].append(5) plotDict['alpha'].append(1.0) plotDict['ls'].append('solid') plotDict['lineWidth'].append(3) else: plotDict = None # get the model parameters and Error for all the found peaks in the list. # list of tuple, [(modelParam, paramsError), ] # where modelParam = [amplitude, mean, sigma], paramsError = [amplitude error, mean error, sigma error] modelInfo = [] numOfFitsInList = len(guessParametersSet) formatStr = '%1.4f' spectrumForFitter = copy.copy(spectrum) for (fitNum, guessParameters) in list(enumerate(guessParametersSet)): modelParams, paramsError, plotDict = \ singleGaussFitter(spectrumForFitter, x, guessParameters, peakName=' peak ' + str(fitNum + 1), showPlot=True, plotDict=plotDict) # quickPlotter(plotDict=plotDict) # subtract the fit from the spectrum so that the next peak can't find it. spectrumForFitter -= gaussian(x, *modelParams) modelInfo.append((modelParams, paramsError)) if verbose: print "fitting for the found peak", fitNum + 1, "of", numOfFitsInList print "in amplitude (guess, fitted, error) = (" + \ str(formatStr % guessParameters[0]) + ", " +\ str(formatStr % modelParams[0]) + ", " +\ str(formatStr % paramsError[0]) + ")" print "in mean (guess, fitted, error) = (" + \ str(formatStr % guessParameters[1]) + ", " +\ str(formatStr % modelParams[1]) + ", " +\ str(formatStr % paramsError[1]) + ")" print "in sigma (guess, fitted, error) = (" + \ str(formatStr % guessParameters[2]) + ", " +\ str(formatStr % modelParams[2]) + ", " +\ str(formatStr % paramsError[2]) + ")\n" if showPlot_gaussFitters: quickPlotter(plotDict=plotDict) return modelInfo