def cross_Valid(egs, numCross, B): crossData = ut.dataCrossSplit(egs, numCross, False) fold = 0 errors = {} for x in B: errors[x] = [0] * numCross for i in range(numCross): data = crossData[i] print "# Fold", fold for b in B: print " ", b, "Bases:", err = calcError(data[0], data[1], b) print err errors[b][fold] = err fold = fold + 1 return errors
def cross_vad(examples, T, num_folds=10): data = ut.dataCrossSplit(examples, num_folds, False) errorRates = [] for i in range(num_folds): egs = data[i] classifier = LogitBoost(egs[0], T) # calculate error rate error = [0.0] * 2 for j in range(2): for x in egs[j]: if classifier(x) != x[0]: error[j] = error[j] + 1 error[j] = error[j] / len(egs[j]) print "Fold ", i, " trainingData errorRate: ", error[0], " testData errorRate:", error[1] errorRates.append(error) arr = np.array(errorRates) print "Train Mean ErrorRate:", np.mean(arr[:, 0]), " Test Mean ErrorRate:", np.mean(arr[:, 1]) print "Train StdVar ErrorRate:", np.sqrt(np.var(arr[:, 0])), " Test Mean ErrorRate:", np.sqrt(np.var(arr[:, 1]))
def cross_vad(examples, num_folds = 10): data = ut.dataCrossSplit(examples, num_folds, False) errorRates = [] for i in range(num_folds): egs = data[i] dt = DTree(SelectAtt) dt.training(egs[0], 1) # calculate error rate error = [0.] * 2 for j in range(2): for x in egs[j]: if dt.predict(x) != x[0]: error[j] = error[j] + 1 error[j] = error[j] / len(egs[j]) print "Fold ", i, " trainingData errorRate: ", error[0], " testData errorRate:", error[1] errorRates.append(error) arr = np.array(errorRates) print "Train Mean ErrorRate:", np.mean(arr[:,0]), " Test Mean ErrorRate:", np.mean(arr[:,1]) print "Train StdVar ErrorRate:", np.sqrt(np.var(arr[:,0])), " Test Mean ErrorRate:", np.sqrt(np.var(arr[:, 1]))
# -*- coding: utf-8 -*- """ Created on Wed Nov 20 22:55:53 2013 @author: jiecaoc """ import utilities as ut import classes as cls egs = ut.importRawData('Ionosphere.csv') raw = ut.dataCrossSplit(egs, 2, False) dt = cls.DTree(cls.SelectAtt) dt.training(raw[0][0]) c = 0 for eg in raw[0][1]: if eg[0] != dt.predict(eg): # print eg[0], dt.predict(eg) c = c + 1 print (c + .0) / len(egs)
for k in range(1, len(crossData[i][1]) + 1): for x in crossData[i][1][k]: totNum = totNum + 1 if cl.classify(x) != k: totError = totError + 1 return (totError + 0.) / totNum # check input if len(sys.argv) != 3: print "input error" exit() fileName = sys.argv[1] if fileName == "Iris.csv": subDim = 3 else: subDim = 9 num_cross = int(sys.argv[2]) # import data # since the date in the file are in order # need to make it random to run cross validation rawData = ut.makeDataRandom(ut.importRawData(fileName)) rawData = ut.importRawData(fileName) trainData = ut.dataCrossSplit(rawData, num_cross) #print tmp[1] print "Data: ", fileName print "Error rate for cross_validation:", cross_validation(trainData, subDim)
def leastSquare(trainData, K = 3): """ give training data, return f(x), defined as f(x) = W'x """ # construct Y Y = np.array([classEncode(x[0]) for x in trainData]) # construct X X = np.array([ x[1:] for x in trainData]) X_ = np.dot( np.linalg.inv( np.dot(X.transpose(), X) ), X.transpose() ) W_T = np.dot(X_, Y).transpose() return lambda x: np.dot(W_T, x) # check input if len(sys.argv) != 3: print "input error" exit() fileName = sys.argv[1] num_cross = int(sys.argv[2]) rawData = ut.makeDataRandom(ut.importRawData(fileName)) crossData = ut.dataCrossSplit(rawData, num_cross, False) print "Data: ", fileName print "Error rate for cross_validation:", cross_validation(crossData)