def testSingle(exampleArr,outcomeArr,theta,muArr,sigmaArr): """ tests that classification of training array using hypothesis(theta) produces the same outcomes as the outcome array """ # don't you have to normalise this 1st? outcomeCalcRaw = np.dot(exampleArr,theta) outcomeCalc = logReg.sigmoid(outcomeCalcRaw) outcomeCalc[outcomeCalc>0.5] = 1 outcomeCalc[outcomeCalc<=0.5] = 0 numTrainEx = float(exampleArr.shape[0]) correct = 100*(outcomeCalc==outcomeArr).sum()/numTrainEx return correct
def testAll(dataType): """ tests one vs all classification over all categories """ # load all theta values readName = dataType+'Theta.csv' (numCat,numFeat,dataType,catNames,theta,mu,sigma) = loadResults(readName,loadForClassify=True) n = numFeat print theta.shape,mu.shape for i in range(numCat): # load labelled digit data saveName = dataType+catNames[i]+'Data.csv' digitData = np.genfromtxt(saveName, delimiter=',') m = digitData.shape[0] # get X, y from data X = digitData[:,:(n)] y = digitData[:,(n)] # add Xo = 1 into X Xcalc = np.zeros((m,n+1)) Xcalc[:,1:] = X.copy() Xcalc[:,0] = 1 # normalise X Xcalc = (Xcalc - mu[:,i]) / sigma[:,i] # generate hypothesis for each training example yCalcArr = np.zeros((m,numCat)) for j in range(numCat): yCalcRaw = np.dot(Xcalc,theta[:,j]) yCalcArr[:,j] = logReg.sigmoid(yCalcRaw) yCalc = np.zeros(m) for j in range(m): bestEst = np.where(yCalcArr[j,:]==max(yCalcArr[j,:]))[0][0] if bestEst == i: yCalc[j] = 1 else: yCalc[j] = 0 # compare calc Y to empirical Y correct = 100*(yCalc==y).sum()/float(m) print 'category',catNames[i] print 'hypothesis generated by logistic regression produces ',correct,' percent \ncorrect classifications of training data'
def classifyImage(imgArr,imgType,parameterName): """ does one vs all classification only currently only works for digits will have to add suits too consider adding this info to parameter files so that this can be automated more simply """ imageSpecs = ImageSpecs(imgType) #numCol = imageSpecs.numCol #numRow = imageSpecs.numRow # load hypothesis function (theta values) path = os.path.normpath('learningData/'+parameterName) (numCat,numFeat,dataType,catNames,theta,mu,sigma) = loadResults(path,loadForClassify=True) # get X (input data for classfication) from image data #X = buildTrainingExampleArray(imgArr,dataType,numCol,numRow,isRGB=False) X = buildTrainingExampleArray(imgArr,imageSpecs,isRGB=False) # add Xo = 1 into X Xcalc = np.zeros(numFeat) Xcalc[1:] = X.copy() Xcalc[0] = 1 yCalcArr = np.zeros(numCat) # generate hypothesis for each digit for i in range(numCat): # normalise X XcalcCat = (Xcalc - mu[:,i]) / sigma[:,i] # generate hypothesis yCalcRaw = np.dot(XcalcCat,theta[:,i]) yCalcArr[i] = logReg.sigmoid(yCalcRaw) bestEst = np.where(yCalcArr==yCalcArr.max())[0][0] catEst = catNames[bestEst] #print yCalcArr #print 'best estimate index',bestEst,'classifies digit as',catEst return catEst