def _checkpointGen(filePath,orCall,force,unpack,useNpy,*args,**kwargs):
    # XXX assume pickling now, ends with 'npz'
    # if the file from 'filePath' exists and 'force' is false, loads the file
    # otherwise, calls 'orCall' and saves the result. *args and **kwargs
    # are passed to 'orCall'.
    # 'Unpack' unpacks the array upon a load. This makes it 'look' like a 
    # simple function call (returns the args, or a tuple list of args)
    # use unpack if you aren't dealing with dictionaries or things like that
    if pGenUtil.isfile(filePath) and not force:
        if (useNpy):
            return _npyLoad(filePath,unpack)
        else:
            # assume we pickle in binary
            fh = open(filePath,'rb')
            data = pickle.load(fh)
            fh.close()
            return data
    else:
        # couldn't find the file.
        # make sure it exists
        path = pGenUtil.getBasePath(filePath)
        pGenUtil.ensureDirExists(path)
        # POST: we can put our file here
        dataToSave = orCall(*args,**kwargs)
        # need to figure out if we need to unpack all the arguments..
        if (useNpy):
            _npySave(filePath,dataToSave)
        else:
            # open the file in binary format for writing
            with open(filePath, 'wb') as fh:
                pickle.dump(dataToSave,fh)
        return dataToSave
Beispiel #2
0
def GetDemoInOut(demoName, baseDir=DemoDir(), raiseOnError=True):
    """
    Returns the demo input and output directories, given a path baseDir and
    name demoName. Recquires files to exist at "<baseDir><demoName>". If
    encountering an error (e.g. permissions, something isn't mounted), raises
    an error. 
    
    Args:
        demoName: The name of the demo. Assumed to be the subdir under "basedir"
        we want to use 

        baseDir: the base directory. Input and output directories are
        "<baseDir><demoName>Input/" and "<baseDir><demoName>Output/", resp.

        raiseOnError : if true, raises an error on an OS. otherwise, just
        prints a warning that something went wrong. 
    Returns:
        tuple of <inputDir>,<outputDir> 
    """
    fullBase = baseDir + demoName
    inputV = pGenUtil.getSanitaryPath(fullBase + "/Input/")
    outputV = pGenUtil.getSanitaryPath(fullBase + "/Output/")
    try:
        pGenUtil.ensureDirExists(inputV)
        pGenUtil.ensureDirExists(outputV)
    except OSError as e:
        if (raiseOnError):
            raise (e)
        print("Warning, couldn't open demo directories based in " + fullBase +
              ". Most likely, not connected to JILA network")
    return inputV, outputV
def _checkpointGen(filePath, orCall, force, unpack, useNpy, *args, **kwargs):
    # XXX assume pickling now, ends with 'npz'
    # if the file from 'filePath' exists and 'force' is false, loads the file
    # otherwise, calls 'orCall' and saves the result. *args and **kwargs
    # are passed to 'orCall'.
    # 'Unpack' unpacks the array upon a load. This makes it 'look' like a
    # simple function call (returns the args, or a tuple list of args)
    # use unpack if you aren't dealing with dictionaries or things like that
    if pGenUtil.isfile(filePath) and not force:
        if (useNpy):
            return _npyLoad(filePath, unpack)
        else:
            # assume we pickle in binary
            fh = open(filePath, 'rb')
            data = pickle.load(fh)
            fh.close()
            return data
    else:
        # couldn't find the file.
        # make sure it exists
        path = pGenUtil.getBasePath(filePath)
        pGenUtil.ensureDirExists(path)
        # POST: we can put our file here
        dataToSave = orCall(*args, **kwargs)
        # need to figure out if we need to unpack all the arguments..
        if (useNpy):
            _npySave(filePath, dataToSave)
        else:
            # open the file in binary format for writing
            with open(filePath, 'wb') as fh:
                pickle.dump(dataToSave, fh)
        return dataToSave
Beispiel #4
0
def DemoJilaOrLocal(demoName,localPath):
    """
    Looks for the demo dir in the default (jila-hosted) space. If nothing is
    found, looks in the paths specified by localpath (where it puts input 
    and output directories according to its name) 
    
    Args:
        demoName: see GetDemoInOut

        localPath: equivalent of baseDir in GetDemoInOut. Where we put the input        and Output directories for the unit test if JILA can't be found.

    Returns:
        tuple of <inputDir>,<outputDir> 
    """
    inDir,outDir = GetDemoInOut(demoName,raiseOnError=False)
    if (not pGenUtil.dirExists(inDir)):
        print("Warning: Couldn't connect to JILA's Network. Using local data.")
        # get "sanitary paths" which as OS-indepdent (in theory..)
        localPath = pGenUtil.ensureEnds(localPath,"/")
        inDir = pGenUtil.getSanitaryPath(localPath)
        outDir = pGenUtil.getSanitaryPath(localPath + "Output" + demoName +"/")
        pGenUtil.ensureDirExists(outDir)
        if (not pGenUtil.dirExists(inDir)):
            # whoops...
            raise IOError("Demo Directory {:s} not found anywhere.".\
                          format(inDir))
    return inDir,outDir
Beispiel #5
0
def GetDemoInOut(demoName,baseDir=DemoDir(),raiseOnError=True):
    """
    Returns the demo input and output directories, given a path baseDir and
    name demoName. Recquires files to exist at "<baseDir><demoName>". If
    encountering an error (e.g. permissions, something isn't mounted), raises
    an error. 
    
    Args:
        demoName: The name of the demo. Assumed to be the subdir under "basedir"
        we want to use 

        baseDir: the base directory. Input and output directories are
        "<baseDir><demoName>Input/" and "<baseDir><demoName>Output/", resp.

        raiseOnError : if true, raises an error on an OS. otherwise, just
        prints a warning that something went wrong. 
    Returns:
        tuple of <inputDir>,<outputDir> 
    """
    fullBase =  baseDir + demoName
    inputV = pGenUtil.getSanitaryPath(fullBase + "/Input/")
    outputV = pGenUtil.getSanitaryPath(fullBase + "/Output/")
    try:
        pGenUtil.ensureDirExists(inputV)
        pGenUtil.ensureDirExists(outputV)
    except OSError as e:
        if (raiseOnError):
            raise(e)
        print("Warning, couldn't open demo directories based in " + fullBase +
              ". Most likely, not connected to JILA network")
    return inputV,outputV
Beispiel #6
0
def saveFile(filePath, dataToSave, useNpy):
    path = pGenUtil.getBasePath(filePath)
    pGenUtil.ensureDirExists(path)
    # need to figure out if we need to unpack all the arguments..
    if (useNpy):
        _npySave(filePath, dataToSave)
    else:
        # open the file in binary format for writing
        with open(filePath, 'wb') as fh:
            # XXX make protocol specifiable?
            pickle.dump(dataToSave, fh, pickle.HIGHEST_PROTOCOL)
def saveFile(filePath,dataToSave,useNpy):
    path = pGenUtil.getBasePath(filePath)
    pGenUtil.ensureDirExists(path)
    # need to figure out if we need to unpack all the arguments..
    if (useNpy):
        _npySave(filePath,dataToSave)
    else:
        # open the file in binary format for writing
        with open(filePath, 'wb') as fh:
            # XXX make protocol specifiable?
            pickle.dump(dataToSave,fh,pickle.HIGHEST_PROTOCOL)
def run(fitter,fitterParams,fitterCoeff,dataClass,label,valid=0.05,
        train="train.csv",test="test.csv",profile=False,nTrials=1,
        force=True,forceFeat=True,plot=False):
    trainFile = train
    testFile = test
    inDir,cacheDir,outDir = getDirsFromCmdLine()
    # add the label for this run (ie: SVM/Boost/LogisticRegression)
    outDir = pGenUtil.ensureDirExists(outDir + label +"/")
    # get the directories we want
    predictDir = pGenUtil.ensureDirExists(outDir + "predictions")
    if (profile and plot):
        profileDir = pGenUtil.ensureDirExists(outDir + "profile")
    else:
        profileDir = None
    # get the data object, by cache or otherwise 
    dataObj = \
    pCheckUtil.pipeline([[cacheDir+'data.pkl',getData,dataClass,outDir,
                          inDir+trainFile,valid,False,profileDir,]],forceFeat)
    return analyze(dataObj,inDir,outDir,testFile,fitter,fitterParams,
                   fitterCoeff,label,dataClass,nTrials,force,plot)
def analyze(dataObj,dataDir,outDir,testFile,createFitter,fitterParams,
            fitterCoeff,label,dataClass,nTrials,force,plot):
    # 'createfitter' takes in the current iteration 'i', and returns a fitter
    # e.g. "return LogisticRegression(C=[10,30,40][i])"
    # 'fitterParams' gives the value of the parameters used at each iter.
    predictDir = pGenUtil.ensureDirExists(outDir + "predictions/")
    testDat = getData(dataClass,outDir,dataDir + testFile,test=True)
    params = fitterParams()
    fName = outDir+"accuracyTrials_{:d}repeats_{:d}params.pkl".format(nTrials,
                                                            len(params))
    means,std=pCheckUtil.getCheckpoint(fName,getAllTrials,
            force,params,outDir,predictDir,dataObj,testDat,nTrials,
                                       fitterCoeff,createFitter,plot,dataClass)
    # plot the accuracies versus the fit parameter.
    if (plot):
        plotAccuracies(outDir,label,means,std,params)
    return means,std
Beispiel #10
0
    fig = pPlotUtil.figure()
    plt.subplot(1,2,1)
    plt.title("Mean of g(xBar)-g(mu) approaches 0",fontsize=fontsize)
    expMean = 0
    plt.plot(nVals,means,'ko',label="Actual Mean")
    plt.axhline(expMean,color='b',linestyle='--',
                label="Expected Mean: {:.2g}".format(expMean))
    plt.ylim(-min(means),max(means)*1.1)
    plt.xlabel("Value of n for binomial",fontsize=fontsize)
    plt.ylabel("Value of g(xBar)-g(mu)",fontsize=fontsize)
    plt.legend(fontsize=fontsize)
    pPlotUtil.tickAxisFont()
    plt.subplot(1,2,2)
    plt.semilogy(nVals,varReal,'ko',label="Actual Variance")
    plt.semilogy(nVals,varDist,'b--',label="Expected Variance")    
    plt.title("Variance of g(xBar)-g(mu)\n approaches expected",
              fontsize=fontsize)
    plt.xlabel("Value of n for binomial",fontsize=fontsize)
    plt.ylabel("Value of g(xBar) variance",fontsize=fontsize)
    pPlotUtil.tickAxisFont()
    plt.legend(fontsize=fontsize)
    pPlotUtil.savefig(fig,outDir + "MeanVar")

if __name__ == '__main__':
    _nVals = np.array([10,20,50,75,100,150,200,350,500,1000])
    pGenUtil.ensureDirExists(outDir)
    _p=1/3.
    _nPoints = 1e5
    dataMatrix = getBinomials(_nVals,_p,_nPoints)
    plotBinomials(dataMatrix,_nVals,_p)
Beispiel #11
0
# force floating point division. Can still use integer with //
from __future__ import division
# This file is used for importing the common utilities classes.
import numpy as np
import matplotlib.pyplot as plt
# need to add the utilities class. Want 'home' to be platform independent
# import the patrick-specific utilities
import GenUtilities as pGenUtil
import PlotUtilities as pPlotUtil
import CheckpointUtilities as pCheckUtil

from scipy.stats import norm
outDir = "./out/"
pGenUtil.ensureDirExists(outDir)

mean = 0
stdev = 1
epsilon = stdev / 100
nPoints = 1000
normDist = norm(loc=mean, scale=stdev)
offsets = np.linspace(mean - 3 * stdev, mean + 3 * stdev, nPoints)
probability = 2 * (normDist.cdf(
    (offsets + epsilon - mean) / stdev) - normDist.cdf(
        (offsets - epsilon - mean) / stdev))

fig = pPlotUtil.figure()
plt.plot(offsets,probability,'r-',
         label="mu = {:.1f}, sigma = {:.1f}, epsilon = {:.2f}".\
         format(mean,stdev,epsilon))
plt.xlabel("offset for CDF, c0")
plt.ylabel("Probability (arbitrary units) to land within epsilon of c0")
    if (generatePNGs):
        X,Y,c1,c2 = getAllStages(fileDict,condition,trial,mWorking,
                                 condNum,trialNum)
        saveAsSubplot(X,Y,c1,c2,allStageDir,vizFileFormat)
    # format the ffmpeg arguments as we want them
    # POST: all videos saved for this trial. make the movie
    generateMovie(allStageDir,condition,trialNum,vizFileFormat)

def saveConditions(condition,condNum,conditionKeys,workDir,outDir):
    for j,trial in enumerate(conditionKeys):
        saveSingleTrial(workDir,outDir,condition,condNum,trial,j)

if __name__ == '__main__':
    inDir,workDir,outDir = parseCmdLine()
    # next two must match, for the automatic video encoding to work
    gUtil.ensureDirExists(outDir)
    gUtil.ensureDirExists(workDir)
    # get all the files. returns a dictionary of dictionaries.
    # each key in the outer (first) dictionary is a condition
    # each key in the innter (second) dictionary is a trial for that condition
    fileDict = getCheckpointFileDict(inDir)
    # loop through each condition and trial
    conditionArr = fileDict.keys()
    processes= []
    for i,condition in enumerate(conditionArr):
        print("Forking off a process for condition {:s}".format(condition))
        func = saveConditions
        funcArgs = (condition,i,fileDict[condition].keys(),workDir,outDir)
        p = (Process(target=func, args=funcArgs))
        processes.append(p)
        p.start()