def cross_vad(examples, num_folds = 10): data = ut.dataCrossSplit(examples, num_folds, False) errorRates = [] for i in range(num_folds): egs = data[i] dt = DTree(SelectAtt) dt.training(egs[0], 1) # calculate error rate error = [0.] * 2 for j in range(2): for x in egs[j]: if dt.predict(x) != x[0]: error[j] = error[j] + 1 error[j] = error[j] / len(egs[j]) print "Fold ", i, " trainingData errorRate: ", error[0], " testData errorRate:", error[1] errorRates.append(error) arr = np.array(errorRates) print "Train Mean ErrorRate:", np.mean(arr[:,0]), " Test Mean ErrorRate:", np.mean(arr[:,1]) print "Train StdVar ErrorRate:", np.sqrt(np.var(arr[:,0])), " Test Mean ErrorRate:", np.sqrt(np.var(arr[:, 1]))