def plotStats(numBars,numTrials,trialStats): f = pltUtil.pFigure() tickLabelX = np.arange(0,numBars) colorCycler = pltUtil.cycleColors() ax = plt.subplot(1,1,1) axRSQ = pltUtil.secondAxis(ax,'RSQ',[0,1.0]) wid = 1/(numTrials*numBars) plt.title('Comparison of Surface Modeling Parameters') barDict = dict(elinewidth=4, ecolor='m') for t in range(numTrials): # XXX fix this to make it more general... RSQ = trialStats[t,-1] numMeanStd = numParams-1 middle = int(numMeanStd/2) meanVals = trialStats[t,0:middle] stdVals = trialStats[t,middle:numMeanStd] numVals = len(meanVals) xVals = tickLabelX+wid*t mColor = next(colorCycler) mLabel = trials[t] ax.bar(xVals[0:numVals],meanVals,yerr=0.2*stdVals, color=mColor,error_kw = barDict,align='center',label=mLabel) axRSQ.bar(xVals[numVals],RSQ,width=wid,color=mColor,align='center', label=mLabel) # only the second time, add a third axis ax.legend(loc='upper left') ax.set_ylabel('Tau value (seconds)') plt.xlim([-0.5,max(xVals)*1.1]) plt.xticks(tickLabelX, statsLabel) pltUtil.saveFigure(f,"Comparison")
plotCount += 1 plt.subplot(numPlots,1,plotCount) expErrRel=np.abs(predict-CRTD)/CRTD plt.semilogy(times,expErrRel,'ro-', label='Error, Exponential Fit') logErrRel = np.abs((predictLog-yVals)/yVals) # divide by zero will give us an nan, impossibe to plot # so, if we find NAN, (which isn't equal to itself), set to # zero. kinda hacky, but I wrote this on a plane.. plt.title('Logarithmic fit has lower average error than exponential fit') logErrRel[np.where(logErrRel != logErrRel)[0]] = 0 plt.semilogy(times,logErrRel,'kx-', label='Error, Linear fit') # get the average (mean?) errors. first order for fit goodness. averageExp = np.mean(expErrRel) averageLog = np.mean(logErrRel) plt.axhline(averageExp,color='r',linestyle='--', label='Average Error, Exp: {:4.2f}'.format(averageExp)) plt.axhline(averageLog,color='k',linestyle='--', label='Average Error, Log: {:4.2f}'.format(averageLog)) plt.ylabel('|Residuals|/CRTD)') plt.xlabel(xLabelStr) plt.legend(loc='best') plotCount += 1 plt.tight_layout() pltUtil.saveFigure(fig,'../../output/bestFit',overrideIO=False,close=False)
def GetPhysicsMain(goodTimes,goodFRET,goodDiff): source = 'Step2::GetPhysics' util.ReportMessage("Starting",source) colors = ['r','g','b','k','m','c','y','0.33','0.66'] colorCycle = cycle(colors) count = 0 fig = plotUtil.pFigure() # XXXfill all these in! based on video size frameRate = 0.1 maxNumTimes = 30*10 distances,times,definedDistancesIdx,nodeIdx =getDistances(goodFRET,goodTimes) # get just the 'nodes' with valid valued. # flatten the distances to get a notion of the 'all time' # distance information. We can use this and kmeans to find a 'folded # and unfolded sttae flattenDistances = np.concatenate(distances) # use two clusters; folded and unfolded numClusters = 2 # lots of iterations (this number seems to work well; the 'smooth' # running time / convergence (?) of kmeans is polynomial # http://en.wikipedia.org/wiki/K-means_clustering numIters = int(1e3) clusters,ignore = cluster.kmeans(flattenDistances,numClusters,iter=numIters) # the clusters give us the 'folded' and 'unfolded' groups. between those, we have # a fairly undefined state. folded = min(clusters) unfolded = max(clusters) clusters = [unfolded,folded] folded = getMinimumTime(distances,times,folded,False) unfolded = getMinimumTime(distances,times,unfolded,True) diffTime, definedUnfoldingIdx = getDifferentialTime(folded,unfolded) goodDiff = util.takeSubset(goodDiff, [nodeIdx,definedUnfoldingIdx]) plt.xscale('log', nonposy='clip') plt.xlabel('Time since protein (seconds)') plt.ylabel('FRET d distance (arb)') fig = plotUtil.pFigure() numPlots = 2 plotCount = 1 fretLabel = 'FRET d Distance (arb)' ax = plt.subplot(numPlots,1,plotCount) plotUtil.histogramPlot(ax,fretLabel,'# Proteins', 'FRET Distance histogram',flattenDistances, len(flattenDistances)/100,True,True) # plot guiding lines for the two clusters we found normalClusters = plotUtil.normalizeTo(flattenDistances,clusters) plt.axvline(normalClusters[0]) plt.axvline(normalClusters[1]) plotCount += 1 ax = plt.subplot(numPlots,1,plotCount) plotUtil.histogramPlot(ax,'Unfolding time distribution','# Proteins', 'Unfolding time (seconds) ',diffTime, len(diffTime)/100,True,True) plotUtil.saveFigure(fig,'tmp2') # return the good unfolding times and differential coefficients return diffTime,goodDiff
def AnalyzeTraces(velX,velY,times,numTimes,fretRatio,MSD,frameRate): util.ReportMessage("AnalyzeTraces") proteinYStr = '# Proteins' numProteins = len(numTimes) numBins = max(numTimes) fig = plotUtil.pFigure() titleStr = "Raw distribution of protein appearances" ax = fig.add_subplot(1,1,1) plotUtil.histogramPlot(ax,'Frame duration of protein',proteinYStr, titleStr,numTimes,numBins) plotUtil.saveFigure(fig,"Protein_Distribution") fig = plotUtil.pFigure() # save the MSD and R^2 of the MSD numStats = 2 msdMatrix = np.zeros((numProteins,numStats)) plotCount = 1 numPlots = 2 msdAx = plt.subplot(numPlots,1,plotCount) allMSDs = np.concatenate(MSD) allTimes= np.concatenate(times) for i in range(numProteins): # get the X and Y values to fit... tmpMSD = MSD[i] # multiple the times by four, per the diffusion formulae tmpTimes = times[i]*2 tmpTimes -= tmpTimes[0] if (len(tmpTimes) < 3): # nothing valid here. set everything to 0 and flag 0 RSQ later # we need at least three values to be able to get a Diffusion Coefficient # and an uncertainty. msdMatrix[i,:] = 0 continue # linear fit deg = 1 polyVals = np.polyfit(tmpTimes,tmpMSD,deg) polyFunc = np.poly1d(polyVals) fitVals = polyFunc(tmpTimes) slope, intercept, r_value, p_value, std_err = \ stats.linregress(fitVals,tmpMSD) rSquared = r_value**2 # MSD (slope) is given by the slope, first coeff returned diffCoeff = polyVals[0] if (diffCoeff < 0): # ignore diffusion coefficients less than 0 continue msdMatrix[i,0] = diffCoeff msdMatrix[i,1] = rSquared rawRsqVals = msdMatrix[:,1] goodIndices = np.where(rawRsqVals > 0)[0] goodMsds = np.take(msdMatrix,goodIndices,axis=0) msdVals = goodMsds[:,0] # plot the histogram of MSDs ax = fig.add_subplot(numPlots,1,plotCount) numBins = max(msdVals) numProteins = len(msdVals) axTmp = plotUtil.histogramPlot(ax,'Diffusion Coeff (pixels^2/second)', proteinYStr, 'Histogram of Protein Hiffusion Coeffs', msdVals,numBins) plotCount+=1 # plot the rSquared values RSqVals = goodMsds[:,1]*100 # RSQ is between 0 and 1, multiply by 100 and fit with 100 bins for 'simple' normalization numBinsRsq= 100 ax = fig.add_subplot(numPlots,1,plotCount) # use 100 bins for each of the RSq values plotUtil.histogramPlot(ax,'R Squared Coeff',proteinYStr, 'Histogram of Protein RSq',RSqVals,numBinsRsq) plotCount += 1 # next, we plot a comparison of the 'raw', 'valid' (D with uncertainty), # and 'processed' # (D fit with RSQ > cutoffRsq) cutoffRsq = 0.8 plt.tight_layout() plotUtil.saveFigure(fig,"MSD") titleStr = proteinYStr xStr = 'Frames Appearing' xLimit = [1,max(numTimes)] bestIndices = np.where(rawRsqVals > cutoffRsq)[0] # make labels for each of the indices ('Raw' is assumed...) compLabel = ["All Proteins","Valid Diffusion Coeffs", ("RSQ > {:.3f}".format(cutoffRsq))] comparisonIndices = [goodIndices,bestIndices] plotUtil.comparisonPlot(xStr,proteinYStr,titleStr,xLimit,numTimes, comparisonIndices,compLabel) # XXX TODO: compare other options (e.g. x velocity, y velcocity, etc) # XXX TODO: try for all files, need a better way to store # return all the diffusion coeffs, as well as the 'best' indices we found return msdMatrix[:,0],bestIndices