def testSingle(exampleArr,outcomeArr,theta,muArr,sigmaArr):
    """
    tests that classification of training array using hypothesis(theta)
    produces the same outcomes as the outcome array
    """
    # don't you have to normalise this 1st?
    outcomeCalcRaw = np.dot(exampleArr,theta)
    outcomeCalc = logReg.sigmoid(outcomeCalcRaw)
    outcomeCalc[outcomeCalc>0.5] = 1
    outcomeCalc[outcomeCalc<=0.5] = 0
    numTrainEx = float(exampleArr.shape[0])
    correct = 100*(outcomeCalc==outcomeArr).sum()/numTrainEx
    return correct
def testAll(dataType):
    """
    tests one vs all classification over all categories
    """
    
    # load all theta values
    readName = dataType+'Theta.csv'
    (numCat,numFeat,dataType,catNames,theta,mu,sigma) = loadResults(readName,loadForClassify=True)
    n = numFeat

    print theta.shape,mu.shape
    
    for i in range(numCat):
        # load labelled digit data
        saveName = dataType+catNames[i]+'Data.csv'
        digitData = np.genfromtxt(saveName, delimiter=',')
	m = digitData.shape[0]
        # get X, y from data
        X = digitData[:,:(n)]
        y = digitData[:,(n)]
        # add Xo = 1 into X
	Xcalc       = np.zeros((m,n+1)) 
        Xcalc[:,1:] = X.copy()
        Xcalc[:,0]  = 1

        # normalise X
        Xcalc = (Xcalc - mu[:,i]) / sigma[:,i]

        # generate hypothesis for each training example
        yCalcArr = np.zeros((m,numCat))
        for j in range(numCat):
            yCalcRaw        = np.dot(Xcalc,theta[:,j])
            yCalcArr[:,j]   = logReg.sigmoid(yCalcRaw)
        yCalc = np.zeros(m)
        for j in range(m):
            bestEst = np.where(yCalcArr[j,:]==max(yCalcArr[j,:]))[0][0]
            if bestEst == i:
                yCalc[j] = 1
            else:
                yCalc[j] = 0
        
        # compare calc Y to empirical Y
        correct = 100*(yCalc==y).sum()/float(m)
        print 'category',catNames[i]       
        print 'hypothesis generated by logistic regression produces ',correct,' percent \ncorrect classifications of training data'
def classifyImage(imgArr,imgType,parameterName):
    """
    does one vs all classification only
    currently only works for digits
    will have to add suits too
    consider adding this info to parameter files so that this can be automated more simply
    """

    imageSpecs = ImageSpecs(imgType) 

    #numCol = imageSpecs.numCol
    #numRow = imageSpecs.numRow
   
    # load hypothesis function (theta values)
    path    = os.path.normpath('learningData/'+parameterName)
    (numCat,numFeat,dataType,catNames,theta,mu,sigma) = loadResults(path,loadForClassify=True)

    # get X (input data for classfication) from image data
    #X = buildTrainingExampleArray(imgArr,dataType,numCol,numRow,isRGB=False)
    X = buildTrainingExampleArray(imgArr,imageSpecs,isRGB=False)
    
    # add Xo = 1 into X
    Xcalc       = np.zeros(numFeat) 
    Xcalc[1:]   = X.copy()
    Xcalc[0]    = 1
    yCalcArr    = np.zeros(numCat)

    # generate hypothesis for each digit
    for i in range(numCat): 
        # normalise X
        XcalcCat = (Xcalc - mu[:,i]) / sigma[:,i]

        # generate hypothesis
        yCalcRaw        = np.dot(XcalcCat,theta[:,i])
        yCalcArr[i]     = logReg.sigmoid(yCalcRaw)
        
    bestEst = np.where(yCalcArr==yCalcArr.max())[0][0]
    catEst = catNames[bestEst]
    #print yCalcArr
    #print 'best estimate index',bestEst,'classifies digit as',catEst
    return catEst