def _checkpointGen(filePath, orCall, force, unpack, useNpy, *args, **kwargs): # XXX assume pickling now, ends with 'npz' # if the file from 'filePath' exists and 'force' is false, loads the file # otherwise, calls 'orCall' and saves the result. *args and **kwargs # are passed to 'orCall'. # 'Unpack' unpacks the array upon a load. This makes it 'look' like a # simple function call (returns the args, or a tuple list of args) # use unpack if you aren't dealing with dictionaries or things like that if pGenUtil.isfile(filePath) and not force: if (useNpy): return _npyLoad(filePath, unpack) else: # assume we pickle in binary fh = open(filePath, 'rb') data = pickle.load(fh) fh.close() return data else: # couldn't find the file. # make sure it exists path = pGenUtil.getBasePath(filePath) pGenUtil.ensureDirExists(path) # POST: we can put our file here dataToSave = orCall(*args, **kwargs) # need to figure out if we need to unpack all the arguments.. if (useNpy): _npySave(filePath, dataToSave) else: # open the file in binary format for writing with open(filePath, 'wb') as fh: pickle.dump(dataToSave, fh) return dataToSave
def GetOStretchByIndices(sep,force,idxStart,idxEnd): mParams = [] mPredictedX = [] mPredictedY = [] # make (linear fits for each) for realIdxInit,realIdxFinal in zip(idxStart,idxEnd): toFitX = sep[realIdxInit:realIdxFinal] toFitY = force[realIdxInit:realIdxFinal] # get the parameters params,_,predictedOStretch = pGenUtil.GenFit(toFitX,toFitY) # add the values we need... mParams.append(params) mPredictedX.append(toFitX) mPredictedY.append(predictedOStretch) # POST: all parameters calculared # need to get the start of the delta L0 L0Init = pGenUtil.lineIntersectParam(mParams[0],mParams[1]) # get the end of the final L0Final = pGenUtil.lineIntersectParam(mParams[1],mParams[2]) approxDelL0 = L0Final-L0Init # get the midpoint, to find the overstretching force midPoint = L0Init + 0.5 * approxDelL0 # get the index of the midpoint idxBetween = np.argmin(np.abs(sep-midPoint)) # if for some reason the data is very noisy, just # use the mean index of the transition (index 2) # to get the indices... indexOStretch = 1 if (idxBetween < idxStart[indexOStretch]): startTx = idxStart[indexOStretch] endTx = idxEnd[indexOStretch] idxBetween = np.mean([startTx,endTx]) whereOStretch = sep[idxBetween] oStretchForce = np.polyval(mParams[1],whereOStretch) return mParams,mPredictedX,mPredictedY,whereOStretch,oStretchForce
def _checkpointGen(filePath,orCall,force,unpack,useNpy,*args,**kwargs): # XXX assume pickling now, ends with 'npz' # if the file from 'filePath' exists and 'force' is false, loads the file # otherwise, calls 'orCall' and saves the result. *args and **kwargs # are passed to 'orCall'. # 'Unpack' unpacks the array upon a load. This makes it 'look' like a # simple function call (returns the args, or a tuple list of args) # use unpack if you aren't dealing with dictionaries or things like that if pGenUtil.isfile(filePath) and not force: if (useNpy): return _npyLoad(filePath,unpack) else: # assume we pickle in binary fh = open(filePath,'rb') data = pickle.load(fh) fh.close() return data else: # couldn't find the file. # make sure it exists path = pGenUtil.getBasePath(filePath) pGenUtil.ensureDirExists(path) # POST: we can put our file here dataToSave = orCall(*args,**kwargs) # need to figure out if we need to unpack all the arguments.. if (useNpy): _npySave(filePath,dataToSave) else: # open the file in binary format for writing with open(filePath, 'wb') as fh: pickle.dump(dataToSave,fh) return dataToSave
def saveFile(filePath,dataToSave,useNpy): path = pGenUtil.getBasePath(filePath) pGenUtil.ensureDirExists(path) # need to figure out if we need to unpack all the arguments.. if (useNpy): _npySave(filePath,dataToSave) else: # open the file in binary format for writing with open(filePath, 'wb') as fh: # XXX make protocol specifiable? pickle.dump(dataToSave,fh,pickle.HIGHEST_PROTOCOL)
def saveFile(filePath, dataToSave, useNpy): path = pGenUtil.getBasePath(filePath) pGenUtil.ensureDirExists(path) # need to figure out if we need to unpack all the arguments.. if (useNpy): _npySave(filePath, dataToSave) else: # open the file in binary format for writing with open(filePath, 'wb') as fh: # XXX make protocol specifiable? pickle.dump(dataToSave, fh, pickle.HIGHEST_PROTOCOL)
def getTouchoffCalibration(timeAppr,forceAppr,mDerivApproach,isApproach): idxStart,idxEnd = getCrossIdxFromApproach(mDerivApproach) # fit lines to the force # start and end *always demarcate the start and end (ish) of the invols # if we are approach, we take everything *before* as constant # if we are touchoff, we take everything *after* as constant if (isApproach): constantSlice = np.s_[0:idxStart] touchoffSlice = np.s_[idxStart:idxEnd] else: constantSlice = np.s_[idxEnd:] touchoffSlice = np.s_[idxStart:idxEnd] timeApprLow = timeAppr[constantSlice] timeTouch = timeAppr[touchoffSlice] paramsFirst,stdFirst,predFirst= pGenUtil.GenFit(timeApprLow, forceAppr[constantSlice]) paramsSecond,stdSecond,predSecond = \ pGenUtil.GenFit(timeTouch,forceAppr[touchoffSlice]) # XXX get error estimate using standard deviations? timeSurface = pGenUtil.lineIntersectParam(paramsFirst, paramsSecond) idxSurface = np.argmin(np.abs(timeAppr-timeSurface)) # set the variables we care about calibObj = CalibrateObject(idxStart,idxEnd, constantSlice,touchoffSlice, paramsFirst,stdFirst,predFirst, paramsSecond,stdSecond,predSecond, timeSurface,idxSurface) return calibObj
def DemoJilaOrLocal(demoName,localPath): """ Looks for the demo dir in the default (jila-hosted) space. If nothing is found, looks in the paths specified by localpath (where it puts input and output directories according to its name) Args: demoName: see GetDemoInOut localPath: equivalent of baseDir in GetDemoInOut. Where we put the input and Output directories for the unit test if JILA can't be found. Returns: tuple of <inputDir>,<outputDir> """ inDir,outDir = GetDemoInOut(demoName,raiseOnError=False) if (not pGenUtil.dirExists(inDir)): print("Warning: Couldn't connect to JILA's Network. Using local data.") # get "sanitary paths" which as OS-indepdent (in theory..) localPath = pGenUtil.ensureEnds(localPath,"/") inDir = pGenUtil.getSanitaryPath(localPath) outDir = pGenUtil.getSanitaryPath(localPath + "Output" + demoName +"/") pGenUtil.ensureDirExists(outDir) if (not pGenUtil.dirExists(inDir)): # whoops... raise IOError("Demo Directory {:s} not found anywhere.".\ format(inDir)) return inDir,outDir
def DemoJilaOrLocal(demoName, localPath): """ Looks for the demo dir in the default (jila-hosted) space. If nothing is found, looks in the paths specified by localpath (where it puts input and output directories according to its name) Args: demoName: see GetDemoInOut localPath: equivalent of baseDir in GetDemoInOut. Where we put the input and Output directories for the unit test if JILA can't be found. Returns: tuple of <inputDir>,<outputDir> """ inDir, outDir = GetDemoInOut(demoName, raiseOnError=False) if (not pGenUtil.dirExists(inDir)): print("Warning: Couldn't connect to JILA's Network. Using local data.") # get "sanitary paths" which as OS-indepdent (in theory..) localPath = pGenUtil.ensureEnds(localPath, "/") inDir = pGenUtil.getSanitaryPath(localPath) outDir = pGenUtil.getSanitaryPath(localPath + "Output" + demoName + "/") pGenUtil.ensureDirExists(outDir) if (not pGenUtil.dirExists(inDir)): # whoops... raise IOError("Demo Directory {:s} not found anywhere.".\ format(inDir)) return inDir, outDir
def filter(self): force = self._force if (pGenUtil.isfile(self._filePath) and not force): # return a new checkpoint object from the data data = np.load(self._filePath) return CheckpointData(**data) else: idx = self.getFilterIdx() return DataFilter.filterDataStatic(self._data,idx, self._filePath,force)
def run(fitter,fitterParams,fitterCoeff,dataClass,label,valid=0.05, train="train.csv",test="test.csv",profile=False,nTrials=1, force=True,forceFeat=True,plot=False): trainFile = train testFile = test inDir,cacheDir,outDir = getDirsFromCmdLine() # add the label for this run (ie: SVM/Boost/LogisticRegression) outDir = pGenUtil.ensureDirExists(outDir + label +"/") # get the directories we want predictDir = pGenUtil.ensureDirExists(outDir + "predictions") if (profile and plot): profileDir = pGenUtil.ensureDirExists(outDir + "profile") else: profileDir = None # get the data object, by cache or otherwise dataObj = \ pCheckUtil.pipeline([[cacheDir+'data.pkl',getData,dataClass,outDir, inDir+trainFile,valid,False,profileDir,]],forceFeat) return analyze(dataObj,inDir,outDir,testFile,fitter,fitterParams, fitterCoeff,label,dataClass,nTrials,force,plot)
def GetDemoInOut(demoName,baseDir=DemoDir(),raiseOnError=True): """ Returns the demo input and output directories, given a path baseDir and name demoName. Recquires files to exist at "<baseDir><demoName>". If encountering an error (e.g. permissions, something isn't mounted), raises an error. Args: demoName: The name of the demo. Assumed to be the subdir under "basedir" we want to use baseDir: the base directory. Input and output directories are "<baseDir><demoName>Input/" and "<baseDir><demoName>Output/", resp. raiseOnError : if true, raises an error on an OS. otherwise, just prints a warning that something went wrong. Returns: tuple of <inputDir>,<outputDir> """ fullBase = baseDir + demoName inputV = pGenUtil.getSanitaryPath(fullBase + "/Input/") outputV = pGenUtil.getSanitaryPath(fullBase + "/Output/") try: pGenUtil.ensureDirExists(inputV) pGenUtil.ensureDirExists(outputV) except OSError as e: if (raiseOnError): raise(e) print("Warning, couldn't open demo directories based in " + fullBase + ". Most likely, not connected to JILA network") return inputV,outputV
def GetDemoInOut(demoName, baseDir=DemoDir(), raiseOnError=True): """ Returns the demo input and output directories, given a path baseDir and name demoName. Recquires files to exist at "<baseDir><demoName>". If encountering an error (e.g. permissions, something isn't mounted), raises an error. Args: demoName: The name of the demo. Assumed to be the subdir under "basedir" we want to use baseDir: the base directory. Input and output directories are "<baseDir><demoName>Input/" and "<baseDir><demoName>Output/", resp. raiseOnError : if true, raises an error on an OS. otherwise, just prints a warning that something went wrong. Returns: tuple of <inputDir>,<outputDir> """ fullBase = baseDir + demoName inputV = pGenUtil.getSanitaryPath(fullBase + "/Input/") outputV = pGenUtil.getSanitaryPath(fullBase + "/Output/") try: pGenUtil.ensureDirExists(inputV) pGenUtil.ensureDirExists(outputV) except OSError as e: if (raiseOnError): raise (e) print("Warning, couldn't open demo directories based in " + fullBase + ". Most likely, not connected to JILA network") return inputV, outputV
def getDatabaseFile(fileName,extension=".hdf"): """ Returns the absolute path to a previously-saved file with the given filename Path is *not* guaranteed to exist, if the file hasn't been saved already. Args: fileName: the name of the file (usually according to the "TraceData" table, field "FileTimSepFor") extension: the recquired extension Returns: Where the file is located, an absolute path. Doesn't guarantee the file *does* exist, just that *if* it does, it would be there. """ fileWithExt = pGenUtil.ensureEnds(fileName,extension) return getDatabaseFolder() + fileWithExt
def getDatabaseFile(fileName, extension=".hdf"): """ Returns the absolute path to a previously-saved file with the given filename Path is *not* guaranteed to exist, if the file hasn't been saved already. Args: fileName: the name of the file (usually according to the "TraceData" table, field "FileTimSepFor") extension: the recquired extension Returns: Where the file is located, an absolute path. Doesn't guarantee the file *does* exist, just that *if* it does, it would be there. """ fileWithExt = pGenUtil.ensureEnds(fileName, extension) return getDatabaseFolder() + fileWithExt
def analyze(dataObj,dataDir,outDir,testFile,createFitter,fitterParams, fitterCoeff,label,dataClass,nTrials,force,plot): # 'createfitter' takes in the current iteration 'i', and returns a fitter # e.g. "return LogisticRegression(C=[10,30,40][i])" # 'fitterParams' gives the value of the parameters used at each iter. predictDir = pGenUtil.ensureDirExists(outDir + "predictions/") testDat = getData(dataClass,outDir,dataDir + testFile,test=True) params = fitterParams() fName = outDir+"accuracyTrials_{:d}repeats_{:d}params.pkl".format(nTrials, len(params)) means,std=pCheckUtil.getCheckpoint(fName,getAllTrials, force,params,outDir,predictDir,dataObj,testDat,nTrials, fitterCoeff,createFitter,plot,dataClass) # plot the accuracies versus the fit parameter. if (plot): plotAccuracies(outDir,label,means,std,params) return means,std
def _checkpointGen(filePath, orCall, force, unpack, useNpy, *args, **kwargs): # XXX assume pickling now, ends with 'npz' # if the file from 'filePath' exists and 'force' is false, loads the file # otherwise, calls 'orCall' and saves the result. *args and **kwargs # are passed to 'orCall'. # 'Unpack' unpacks the array upon a load. This makes it 'look' like a # simple function call (returns the args, or a tuple list of args) # use unpack if you aren't dealing with dictionaries or things like that if pGenUtil.isfile(filePath) and not force: return loadFile(filePath, useNpy) else: # couldn't find the file. # make sure it exists # POST: we can put our file here dataToSave = orCall(*args, **kwargs) # save the data, so next time we can just load saveFile(filePath, dataToSave, useNpy) return dataToSave
def _checkpointGen(filePath,orCall,force,unpack,useNpy,*args,**kwargs): # XXX assume pickling now, ends with 'npz' # if the file from 'filePath' exists and 'force' is false, loads the file # otherwise, calls 'orCall' and saves the result. *args and **kwargs # are passed to 'orCall'. # 'Unpack' unpacks the array upon a load. This makes it 'look' like a # simple function call (returns the args, or a tuple list of args) # use unpack if you aren't dealing with dictionaries or things like that if pGenUtil.isfile(filePath) and not force: return loadFile(filePath,useNpy) else: # couldn't find the file. # make sure it exists # POST: we can put our file here dataToSave = orCall(*args,**kwargs) # save the data, so next time we can just load saveFile(filePath,dataToSave,useNpy) return dataToSave
def savefig(figure,fileName,close=True,tight=True,**kwargs): # source : where to save the output iunder the output folder # filename: what to save the file as. automagically saved as high res pdf # override IO: if true, ignore any path infomation in the file name stuff. # close: if true, close the figure after saving. if (tight): plt.tight_layout(True) baseName = util.getFileFromPath(fileName) if ("." not in baseName): formatStr = ".svg" fullName = fileName + formatStr else: _,formatStr = os.path.splitext(fileName) fullName = fileName figure.savefig(fullName,format=formatStr[1:], dpi=figure.get_dpi(),**kwargs) if (close): plt.close(figure)
def pipeline(objects, force=None): # objects are a list, each element is : [<file>,<function>,<args>]: # file name, # function then the ('extra' args the funcion # needs. we assume that each filter in the pipeline takes # the previous arguments, plus any others, and returns the next arg # the first just takes in whatever it is given, the last can return anything # in other words, the signatures are: # f1(f1_args), returning f2_chain # f2(f2_chain,f2_args), returning f3_chain # ... # fN(fN_chain,fNargs), returning whatever. filesExist = [pGenUtil.isfile(o[pipe_fileIdx]) for o in objects] numObjects = len(objects) # get a list of forces force = _pipeListParser(force, False, numObjects) # get a list of how to save. numpy = [not o[pipe_fileIdx].endswith('.pkl') for o in objects] # by default, if no force arguments passed, assume we dont want to force # in other words: just load by default runIfFalse = [ fExists and (not forceThis) for fExists, forceThis in zip(filesExist, force) ] if (False not in runIfFalse): # just load the last... otherArgs = _pipeHelper(objects[-1], False, numpy[-1]) else: # need to run at least one, go through them all otherArgs = None firstZero = runIfFalse.index(False) # if not at the start, load 'most downstream' if (firstZero != 0): idx = firstZero - 1 otherArgs = _pipeHelper(objects[idx], force[idx], numpy[idx], otherArgs) # POST: otherargs is set up, if we need it. for i in range(firstZero, numObjects): otherArgs = _pipeHelper(objects[i], force[i], numpy[i], otherArgs) return otherArgs
def savefig(figure, fileName, close=True, tight=True, **kwargs): # source : where to save the output iunder the output folder # filename: what to save the file as. automagically saved as high res pdf # override IO: if true, ignore any path infomation in the file name stuff. # close: if true, close the figure after saving. if (tight): plt.tight_layout(True) baseName = util.getFileFromPath(fileName) if ("." not in baseName): formatStr = ".svg" fullName = fileName + formatStr else: _, formatStr = os.path.splitext(fileName) fullName = fileName figure.savefig(fullName, format=formatStr[1:], dpi=figure.get_dpi(), **kwargs) if (close): plt.close(figure)
def pipeline(objects,force=None): # objects are a list, each element is : [<file>,<function>,<args>]: # file name, # function then the ('extra' args the funcion # needs. we assume that each filter in the pipeline takes # the previous arguments, plus any others, and returns the next arg # the first just takes in whatever it is given, the last can return anything # in other words, the signatures are: # f1(f1_args), returning f2_chain # f2(f2_chain,f2_args), returning f3_chain # ... # fN(fN_chain,fNargs), returning whatever. filesExist = [pGenUtil.isfile(o[pipe_fileIdx]) for o in objects] numObjects = len(objects) # get a list of forces force = _pipeListParser(force,False,numObjects) # get a list of how to save. numpy = [ not o[pipe_fileIdx].endswith('.pkl') for o in objects] # by default, if no force arguments passed, assume we dont want to force # in other words: just load by default runIfFalse = [ fExists and (not forceThis) for fExists,forceThis in zip(filesExist,force)] if (False not in runIfFalse): # just load the last... otherArgs = _pipeHelper(objects[-1],False,numpy[-1]) else: # need to run at least one, go through them all otherArgs = None firstZero = runIfFalse.index(False) # if not at the start, load 'most downstream' if (firstZero != 0): idx = firstZero-1 otherArgs = _pipeHelper(objects[idx], force[idx],numpy[idx], otherArgs) # POST: otherargs is set up, if we need it. for i in range(firstZero,numObjects): otherArgs = _pipeHelper(objects[i],force[i],numpy[i],otherArgs) return otherArgs
# force floating point division. Can still use integer with // from __future__ import division # This file is used for importing the common utilities classes. import numpy as np import matplotlib.pyplot as plt # need to add the utilities class. Want 'home' to be platform independent # import the patrick-specific utilities import GenUtilities as pGenUtil import PlotUtilities as pPlotUtil import CheckpointUtilities as pCheckUtil from scipy.stats import norm outDir = "./out/" pGenUtil.ensureDirExists(outDir) mean = 0 stdev = 1 epsilon = stdev / 100 nPoints = 1000 normDist = norm(loc=mean, scale=stdev) offsets = np.linspace(mean - 3 * stdev, mean + 3 * stdev, nPoints) probability = 2 * (normDist.cdf( (offsets + epsilon - mean) / stdev) - normDist.cdf( (offsets - epsilon - mean) / stdev)) fig = pPlotUtil.figure() plt.plot(offsets,probability,'r-', label="mu = {:.1f}, sigma = {:.1f}, epsilon = {:.2f}".\ format(mean,stdev,epsilon)) plt.xlabel("offset for CDF, c0") plt.ylabel("Probability (arbitrary units) to land within epsilon of c0")
fig = pPlotUtil.figure() plt.subplot(1,2,1) plt.title("Mean of g(xBar)-g(mu) approaches 0",fontsize=fontsize) expMean = 0 plt.plot(nVals,means,'ko',label="Actual Mean") plt.axhline(expMean,color='b',linestyle='--', label="Expected Mean: {:.2g}".format(expMean)) plt.ylim(-min(means),max(means)*1.1) plt.xlabel("Value of n for binomial",fontsize=fontsize) plt.ylabel("Value of g(xBar)-g(mu)",fontsize=fontsize) plt.legend(fontsize=fontsize) pPlotUtil.tickAxisFont() plt.subplot(1,2,2) plt.semilogy(nVals,varReal,'ko',label="Actual Variance") plt.semilogy(nVals,varDist,'b--',label="Expected Variance") plt.title("Variance of g(xBar)-g(mu)\n approaches expected", fontsize=fontsize) plt.xlabel("Value of n for binomial",fontsize=fontsize) plt.ylabel("Value of g(xBar) variance",fontsize=fontsize) pPlotUtil.tickAxisFont() plt.legend(fontsize=fontsize) pPlotUtil.savefig(fig,outDir + "MeanVar") if __name__ == '__main__': _nVals = np.array([10,20,50,75,100,150,200,350,500,1000]) pGenUtil.ensureDirExists(outDir) _p=1/3. _nPoints = 1e5 dataMatrix = getBinomials(_nVals,_p,_nPoints) plotBinomials(dataMatrix,_nVals,_p)
def callIfNoFile(cls,toCall,fileN): if (pGenUtil.isfile(fileN) ): return np.load(fileN) else: return toCall(fileN)
def __init__(self,data,mFile,force=False,frameRate=0.1,ext='.npz'): ''' passed in a data object ''' self._data = data self._filePath = pGenUtil.ensureEnds(mFile,ext) self._force = force self._frameRate = frameRate
if (generatePNGs): X,Y,c1,c2 = getAllStages(fileDict,condition,trial,mWorking, condNum,trialNum) saveAsSubplot(X,Y,c1,c2,allStageDir,vizFileFormat) # format the ffmpeg arguments as we want them # POST: all videos saved for this trial. make the movie generateMovie(allStageDir,condition,trialNum,vizFileFormat) def saveConditions(condition,condNum,conditionKeys,workDir,outDir): for j,trial in enumerate(conditionKeys): saveSingleTrial(workDir,outDir,condition,condNum,trial,j) if __name__ == '__main__': inDir,workDir,outDir = parseCmdLine() # next two must match, for the automatic video encoding to work gUtil.ensureDirExists(outDir) gUtil.ensureDirExists(workDir) # get all the files. returns a dictionary of dictionaries. # each key in the outer (first) dictionary is a condition # each key in the innter (second) dictionary is a trial for that condition fileDict = getCheckpointFileDict(inDir) # loop through each condition and trial conditionArr = fileDict.keys() processes= [] for i,condition in enumerate(conditionArr): print("Forking off a process for condition {:s}".format(condition)) func = saveConditions funcArgs = (condition,i,fileDict[condition].keys(),workDir,outDir) p = (Process(target=func, args=funcArgs)) processes.append(p) p.start()