def _checkpointGen(filePath,orCall,force,unpack,useNpy,*args,**kwargs): # XXX assume pickling now, ends with 'npz' # if the file from 'filePath' exists and 'force' is false, loads the file # otherwise, calls 'orCall' and saves the result. *args and **kwargs # are passed to 'orCall'. # 'Unpack' unpacks the array upon a load. This makes it 'look' like a # simple function call (returns the args, or a tuple list of args) # use unpack if you aren't dealing with dictionaries or things like that if pGenUtil.isfile(filePath) and not force: if (useNpy): return _npyLoad(filePath,unpack) else: # assume we pickle in binary fh = open(filePath,'rb') data = pickle.load(fh) fh.close() return data else: # couldn't find the file. # make sure it exists path = pGenUtil.getBasePath(filePath) pGenUtil.ensureDirExists(path) # POST: we can put our file here dataToSave = orCall(*args,**kwargs) # need to figure out if we need to unpack all the arguments.. if (useNpy): _npySave(filePath,dataToSave) else: # open the file in binary format for writing with open(filePath, 'wb') as fh: pickle.dump(dataToSave,fh) return dataToSave
def GetDemoInOut(demoName, baseDir=DemoDir(), raiseOnError=True): """ Returns the demo input and output directories, given a path baseDir and name demoName. Recquires files to exist at "<baseDir><demoName>". If encountering an error (e.g. permissions, something isn't mounted), raises an error. Args: demoName: The name of the demo. Assumed to be the subdir under "basedir" we want to use baseDir: the base directory. Input and output directories are "<baseDir><demoName>Input/" and "<baseDir><demoName>Output/", resp. raiseOnError : if true, raises an error on an OS. otherwise, just prints a warning that something went wrong. Returns: tuple of <inputDir>,<outputDir> """ fullBase = baseDir + demoName inputV = pGenUtil.getSanitaryPath(fullBase + "/Input/") outputV = pGenUtil.getSanitaryPath(fullBase + "/Output/") try: pGenUtil.ensureDirExists(inputV) pGenUtil.ensureDirExists(outputV) except OSError as e: if (raiseOnError): raise (e) print("Warning, couldn't open demo directories based in " + fullBase + ". Most likely, not connected to JILA network") return inputV, outputV
def _checkpointGen(filePath, orCall, force, unpack, useNpy, *args, **kwargs): # XXX assume pickling now, ends with 'npz' # if the file from 'filePath' exists and 'force' is false, loads the file # otherwise, calls 'orCall' and saves the result. *args and **kwargs # are passed to 'orCall'. # 'Unpack' unpacks the array upon a load. This makes it 'look' like a # simple function call (returns the args, or a tuple list of args) # use unpack if you aren't dealing with dictionaries or things like that if pGenUtil.isfile(filePath) and not force: if (useNpy): return _npyLoad(filePath, unpack) else: # assume we pickle in binary fh = open(filePath, 'rb') data = pickle.load(fh) fh.close() return data else: # couldn't find the file. # make sure it exists path = pGenUtil.getBasePath(filePath) pGenUtil.ensureDirExists(path) # POST: we can put our file here dataToSave = orCall(*args, **kwargs) # need to figure out if we need to unpack all the arguments.. if (useNpy): _npySave(filePath, dataToSave) else: # open the file in binary format for writing with open(filePath, 'wb') as fh: pickle.dump(dataToSave, fh) return dataToSave
def DemoJilaOrLocal(demoName,localPath): """ Looks for the demo dir in the default (jila-hosted) space. If nothing is found, looks in the paths specified by localpath (where it puts input and output directories according to its name) Args: demoName: see GetDemoInOut localPath: equivalent of baseDir in GetDemoInOut. Where we put the input and Output directories for the unit test if JILA can't be found. Returns: tuple of <inputDir>,<outputDir> """ inDir,outDir = GetDemoInOut(demoName,raiseOnError=False) if (not pGenUtil.dirExists(inDir)): print("Warning: Couldn't connect to JILA's Network. Using local data.") # get "sanitary paths" which as OS-indepdent (in theory..) localPath = pGenUtil.ensureEnds(localPath,"/") inDir = pGenUtil.getSanitaryPath(localPath) outDir = pGenUtil.getSanitaryPath(localPath + "Output" + demoName +"/") pGenUtil.ensureDirExists(outDir) if (not pGenUtil.dirExists(inDir)): # whoops... raise IOError("Demo Directory {:s} not found anywhere.".\ format(inDir)) return inDir,outDir
def GetDemoInOut(demoName,baseDir=DemoDir(),raiseOnError=True): """ Returns the demo input and output directories, given a path baseDir and name demoName. Recquires files to exist at "<baseDir><demoName>". If encountering an error (e.g. permissions, something isn't mounted), raises an error. Args: demoName: The name of the demo. Assumed to be the subdir under "basedir" we want to use baseDir: the base directory. Input and output directories are "<baseDir><demoName>Input/" and "<baseDir><demoName>Output/", resp. raiseOnError : if true, raises an error on an OS. otherwise, just prints a warning that something went wrong. Returns: tuple of <inputDir>,<outputDir> """ fullBase = baseDir + demoName inputV = pGenUtil.getSanitaryPath(fullBase + "/Input/") outputV = pGenUtil.getSanitaryPath(fullBase + "/Output/") try: pGenUtil.ensureDirExists(inputV) pGenUtil.ensureDirExists(outputV) except OSError as e: if (raiseOnError): raise(e) print("Warning, couldn't open demo directories based in " + fullBase + ". Most likely, not connected to JILA network") return inputV,outputV
def saveFile(filePath, dataToSave, useNpy): path = pGenUtil.getBasePath(filePath) pGenUtil.ensureDirExists(path) # need to figure out if we need to unpack all the arguments.. if (useNpy): _npySave(filePath, dataToSave) else: # open the file in binary format for writing with open(filePath, 'wb') as fh: # XXX make protocol specifiable? pickle.dump(dataToSave, fh, pickle.HIGHEST_PROTOCOL)
def saveFile(filePath,dataToSave,useNpy): path = pGenUtil.getBasePath(filePath) pGenUtil.ensureDirExists(path) # need to figure out if we need to unpack all the arguments.. if (useNpy): _npySave(filePath,dataToSave) else: # open the file in binary format for writing with open(filePath, 'wb') as fh: # XXX make protocol specifiable? pickle.dump(dataToSave,fh,pickle.HIGHEST_PROTOCOL)
def run(fitter,fitterParams,fitterCoeff,dataClass,label,valid=0.05, train="train.csv",test="test.csv",profile=False,nTrials=1, force=True,forceFeat=True,plot=False): trainFile = train testFile = test inDir,cacheDir,outDir = getDirsFromCmdLine() # add the label for this run (ie: SVM/Boost/LogisticRegression) outDir = pGenUtil.ensureDirExists(outDir + label +"/") # get the directories we want predictDir = pGenUtil.ensureDirExists(outDir + "predictions") if (profile and plot): profileDir = pGenUtil.ensureDirExists(outDir + "profile") else: profileDir = None # get the data object, by cache or otherwise dataObj = \ pCheckUtil.pipeline([[cacheDir+'data.pkl',getData,dataClass,outDir, inDir+trainFile,valid,False,profileDir,]],forceFeat) return analyze(dataObj,inDir,outDir,testFile,fitter,fitterParams, fitterCoeff,label,dataClass,nTrials,force,plot)
def analyze(dataObj,dataDir,outDir,testFile,createFitter,fitterParams, fitterCoeff,label,dataClass,nTrials,force,plot): # 'createfitter' takes in the current iteration 'i', and returns a fitter # e.g. "return LogisticRegression(C=[10,30,40][i])" # 'fitterParams' gives the value of the parameters used at each iter. predictDir = pGenUtil.ensureDirExists(outDir + "predictions/") testDat = getData(dataClass,outDir,dataDir + testFile,test=True) params = fitterParams() fName = outDir+"accuracyTrials_{:d}repeats_{:d}params.pkl".format(nTrials, len(params)) means,std=pCheckUtil.getCheckpoint(fName,getAllTrials, force,params,outDir,predictDir,dataObj,testDat,nTrials, fitterCoeff,createFitter,plot,dataClass) # plot the accuracies versus the fit parameter. if (plot): plotAccuracies(outDir,label,means,std,params) return means,std
fig = pPlotUtil.figure() plt.subplot(1,2,1) plt.title("Mean of g(xBar)-g(mu) approaches 0",fontsize=fontsize) expMean = 0 plt.plot(nVals,means,'ko',label="Actual Mean") plt.axhline(expMean,color='b',linestyle='--', label="Expected Mean: {:.2g}".format(expMean)) plt.ylim(-min(means),max(means)*1.1) plt.xlabel("Value of n for binomial",fontsize=fontsize) plt.ylabel("Value of g(xBar)-g(mu)",fontsize=fontsize) plt.legend(fontsize=fontsize) pPlotUtil.tickAxisFont() plt.subplot(1,2,2) plt.semilogy(nVals,varReal,'ko',label="Actual Variance") plt.semilogy(nVals,varDist,'b--',label="Expected Variance") plt.title("Variance of g(xBar)-g(mu)\n approaches expected", fontsize=fontsize) plt.xlabel("Value of n for binomial",fontsize=fontsize) plt.ylabel("Value of g(xBar) variance",fontsize=fontsize) pPlotUtil.tickAxisFont() plt.legend(fontsize=fontsize) pPlotUtil.savefig(fig,outDir + "MeanVar") if __name__ == '__main__': _nVals = np.array([10,20,50,75,100,150,200,350,500,1000]) pGenUtil.ensureDirExists(outDir) _p=1/3. _nPoints = 1e5 dataMatrix = getBinomials(_nVals,_p,_nPoints) plotBinomials(dataMatrix,_nVals,_p)
# force floating point division. Can still use integer with // from __future__ import division # This file is used for importing the common utilities classes. import numpy as np import matplotlib.pyplot as plt # need to add the utilities class. Want 'home' to be platform independent # import the patrick-specific utilities import GenUtilities as pGenUtil import PlotUtilities as pPlotUtil import CheckpointUtilities as pCheckUtil from scipy.stats import norm outDir = "./out/" pGenUtil.ensureDirExists(outDir) mean = 0 stdev = 1 epsilon = stdev / 100 nPoints = 1000 normDist = norm(loc=mean, scale=stdev) offsets = np.linspace(mean - 3 * stdev, mean + 3 * stdev, nPoints) probability = 2 * (normDist.cdf( (offsets + epsilon - mean) / stdev) - normDist.cdf( (offsets - epsilon - mean) / stdev)) fig = pPlotUtil.figure() plt.plot(offsets,probability,'r-', label="mu = {:.1f}, sigma = {:.1f}, epsilon = {:.2f}".\ format(mean,stdev,epsilon)) plt.xlabel("offset for CDF, c0") plt.ylabel("Probability (arbitrary units) to land within epsilon of c0")
if (generatePNGs): X,Y,c1,c2 = getAllStages(fileDict,condition,trial,mWorking, condNum,trialNum) saveAsSubplot(X,Y,c1,c2,allStageDir,vizFileFormat) # format the ffmpeg arguments as we want them # POST: all videos saved for this trial. make the movie generateMovie(allStageDir,condition,trialNum,vizFileFormat) def saveConditions(condition,condNum,conditionKeys,workDir,outDir): for j,trial in enumerate(conditionKeys): saveSingleTrial(workDir,outDir,condition,condNum,trial,j) if __name__ == '__main__': inDir,workDir,outDir = parseCmdLine() # next two must match, for the automatic video encoding to work gUtil.ensureDirExists(outDir) gUtil.ensureDirExists(workDir) # get all the files. returns a dictionary of dictionaries. # each key in the outer (first) dictionary is a condition # each key in the innter (second) dictionary is a trial for that condition fileDict = getCheckpointFileDict(inDir) # loop through each condition and trial conditionArr = fileDict.keys() processes= [] for i,condition in enumerate(conditionArr): print("Forking off a process for condition {:s}".format(condition)) func = saveConditions funcArgs = (condition,i,fileDict[condition].keys(),workDir,outDir) p = (Process(target=func, args=funcArgs)) processes.append(p) p.start()