def train_fold(X, Y, nFolds, nFeatures, depth, minLeaf): m = [] errTr = [] errTe = [] print "NFOLD: ", X.shape, Y.shape for iFold in range(nFolds): Xtri, Xtei, Ytri, Ytei = ml.crossValidate(X, Y, nFolds, iFold) print Xtri.shape, Ytri.shape, Xtei.shape, Ytei.shape dt = ml.dtree.treeClassify(Xtri, Ytri, minLeaf=minLeaf, maxDepth=depth, nFeatures=nFeatures) #m.append(dt) Yteihat = dt.predict(Xtei) Ytrihat = dt.predict(Xtri) errTr.append(computeError(Ytri, Ytrihat)) errTe.append(computeError(Ytei, Yteihat)) if (errTr[-1] > 0.32 and errTe[-1] > 0.32): print "High Err: ", ( nFeatures, depth, minLeaf ), "has high error. Stopping nFold training on these parameters" break return (np.mean(errTr), np.mean(errTe), m)
def DegreeCrossValidation(nFolds, degree, Xtr, Ytr): J = dict() XtrP = ml.transforms.fpoly(Xtr, degree, bias=False) XtrP, params = ml.transforms.rescale(XtrP) for iFold in range(nFolds): Xti, Xvi, Yti, Yvi = ml.crossValidate(XtrP, Ytr, nFolds, iFold) learner = ml.linear.linearRegress(Xti, Yti) J[iFold] = MSE(Yvi, learner.predict(Xvi)) return (sum([x[0] for x in J.values()]) / 5)
def hists(): X, Y, Xte = r.init() Xtri, Xtei, Ytri, Ytei = ml.crossValidate(X, Y, 5, 0) # for i in range(0, 4): # plt.subplot(1, 4, i+1) # plt.hist(Xtei[:,i]) # plt.show() for i in range(Xtei.shape[0]): plt.hist(Xtei[:,i]) plt.show()
def scatter(): X, Y, Xte = r.init() #for x in combinations(range(14), 2): # #plt.subplot(1, 14, x[1]) # plt.scatter(X[:,x[0]], X[:,x[1]], c=Y) # plt.show() # everything wrt feature 1 const_feature = 0 Xtri, Xtei, Ytri, Ytei = ml.crossValidate(X, Y, 5, 0) for i in range(1, 14): if i != const_feature: plt.scatter(Xtei[:,const_feature], Xtei[:,i], c=Ytei) plt.show()
def predict2(): X, Y, Xte = r.init() X, _ = ml.transforms.rescale(X) nFolds = 5 errTr = [] errTe = [] l = [ 2, 4, 6, 8, 10, 16, 32, 64, 100, 128, 150, 256, 328, 400, 512, 768, 800, 1024, 1568, 2048 ] for features in l: dtc = DecisionTreeClassifier(max_leaf_nodes=features) tre = 0 tee = 0 for iFold in range(nFolds): print 'Training for features', features, 'fold', iFold Xtri, Xtei, Ytri, Ytei = ml.crossValidate(X, Y, nFolds, iFold) Ytri, Ytei = Ytri[:, np.newaxis], Ytei[:, np.newaxis] #print Xtri.shape, Xtei.shape, Ytri.shape, Ytei.shape dtc.fit(Xtri, Ytri) e1 = r.computeError(dtc.predict(Xtri)[:, np.newaxis], Ytri) tre += e1 print 'Training Error', e1 e1 = r.computeError(dtc.predict((Xtei))[:, np.newaxis], Ytei) print 'Test Error', e1 tee += e1 errTr.append(tre / nFolds) errTe.append(tee / nFolds) print '===== features:', features, 'Training: ', errTr[ -1], 'Test', errTe[-1] print 'Training Error', errTr print 'Test Error', errTe plt.plot(l, errTr, 'r.') plt.plot(l, errTe, 'b.') plt.show()
# %% from sklearn.neural_network import MLPClassifier """ solver = 'adam' for large data sets """ Test = X_test.shape[0] neural_net = np.zeros((Test, 2)) train = [] average = [] nFolds = 10 for iFold in range(nFolds): Xtr, Xva, Ytr, Yva = ml.crossValidate(X_data, Y_data, nFolds, iFold) neural_network = MLPClassifier(solver='adam', random_state=0) neural_network.fit(Xtr, Ytr) predict += neural_network.predict_proba(X_test) train.append(np.mean(neural_network.predict(Xtr) == Ytr)) average.append(np.mean(neural_network.predict(Xva) == Yva)) #print(Yhat) print("training error: {}".format(np.mean(train)))
data = np.genfromtxt("data/curve80.txt", delimiter=None) X = data[:, 0] # First column is feature X = X[:, np.newaxis] # code expects shape (M,N) so make sure it's 2-dimensional Y = data[:, 1] # Second column is the result Xtr, Xte, Ytr, Yte = ml.splitData(X, Y, 0.75) # split data set 75/25 nFolds = 5 degrees = [1, 3, 5, 7, 10, 18] validationMSEs = [] for degree in degrees: J = [] for iFold in range(nFolds): # ith block as validation Xti, Xvi, Yti, Yvi = ml.crossValidate(Xtr, Ytr, nFolds, iFold) Yvi = Yvi[:, np.newaxis] XtiP = ml.transforms.fpoly(Xti, degree, bias=False) XtiP, params = ml.transforms.rescale(XtiP) learner = ml.linear.linearRegress(XtiP, Yti) XviP, _ = ml.transforms.rescale( ml.transforms.fpoly(Xvi, degree, False), params) # Calculating error in test and training data YValPredP = learner.predict(XviP) valError = np.mean((YValPredP - Yvi)**2) J.append(valError) validationMSEs.append(np.mean(J)) plt.semilogy(degrees, validationMSEs, c='red') plt.xticks(degrees, degrees) plt.show()
test_error = [] cross_error = [] cross_fold = 5 degrees = range(1, 20, 3) #degrees = (1, 3, 5, 7, 10, 18) plt.figure(1, (17, 7)) plt.subplot(1, 2, 1) plt.scatter(train_features, train_targets, color='b', label='training data') plt.scatter(test_features, test_targets, color='r', label='test data') for degree in degrees: # cross validate c_error_d = np.array([]) for iFold in range(cross_fold): Xt, Xv, Yt, Yv = ml.crossValidate(train_features, train_targets, cross_fold, iFold) pXt, params = ml.transforms.rescale(ml.transforms.fpoly(Xt, degree, 0)) pXv = ml.transforms.rescale(ml.transforms.fpoly(Xv, degree, 0), params)[0] learner = ml.linear.linearRegress(pXt, Yt) predicted_Yv = learner.predict(pXv).flatten() c_error_d = np.append( c_error_d, np.sum(np.power(predicted_Yv - Yv, 2)) / float(Yv.shape[0])) cross_error.append(np.mean(c_error_d)) # prepare data poly_train_features, params = ml.transforms.rescale( ml.transforms.fpoly(train_features, degree, 0)) poly_test_features = ml.transforms.rescale( ml.transforms.fpoly(test_features, degree, 0), params)[0]
plt.semilogy([1, 3, 5, 7, 10, 18], mse_te, 'g-', linewidth=2) plt.xlabel('Degree') plt.ylabel('MSE') plt.show() ''' Problem 2 ''' mse_cv = [] nFolds = 5 for degree in [1, 3, 5, 7, 10, 18]: params = (None, None) # Define a function "Phi(X)" which outputs the expanded and scaled feature matrix: Phi = lambda X: ml.transforms.rescale( ml.transforms.fpoly(X, degree, False), params)[0] # the parameters "degree" and "params" are memorized at the function definition J = np.zeros(nFolds) for iFold in range(nFolds): Xti, Xvi, Yti, Yvi = ml.crossValidate( Xtr, Ytr, nFolds, iFold) # take ith data block as validation learner = ml.linear.linearRegress( Phi(Xti), Yti) # train on Xti, Yti , the data for this fold J[iFold] = learner.mse( Phi(Xvi), Yvi) # now compute the MSE on Xvi, Yvi and save it mse_cv = np.append(mse_cv, np.mean(J)) plt.semilogy([1, 3, 5, 7, 10, 18], mse_cv, 'b-', linewidth=2) plt.semilogy([1, 3, 5, 7, 10, 18], mse_te, 'g-', linewidth=2) plt.xlabel('Degree') plt.ylabel('MSE Cross Validation') plt.show()
Y = np.genfromtxt("data/trainY.txt", delimiter=',') # also load features of the test data (to be predicted) print X.shape print Y.shape nBag = 101 learners = np.array([2, 5, 10, 20, 25, 50]) classifiers = [None] * nBag # Allocate space for learners errT = np.zeros((len(learners), )) nFolds = 10 errX = np.zeros((len(learners), nFolds)) for iFold in range(nFolds): [Xti, Xvi, Yti, Yvi] = ml.crossValidate(X, Y, nFolds, iFold) for i in range(nBag): Xi, Yi = ml.bootstrapData(Xti, Yti) classifiers[i] = ml.dtree.treeRegress( Xi, Yi, maxDepth=20, minParent=1024, nFeatures=60) # Train a model on data Xi, Yi for i in range(len(learners)): learnerNum = learners[i] predict = np.zeros( (learnerNum)) # Allocate space for predictions from each model for j in range(learnerNum): predict[j] = np.sqrt(classifiers[j].mse( Xvi, Yvi)) # Apply each classifier, calculate RMSE errX[i, iFold] = np.mean(predict) errX = np.mean(errX, axis=1)
# constants train_dp = features_train.shape[ 0] # number of data points of the training data test_dp = features_test.shape[0] # number of data points of the testing data cv_k = 5 # k value for k-fold cross validate degrees = range( 2, 15, 3) # polynomial degrees on which the training/testing will take place # train and test error_cv = [ ] # MSE on different polynomial degrees obtained by k-fold cross validation error_test = [] # MSE on different polynomial degrees obtained by testing print_report('training starts...') for degree in degrees: print_report('polynomial degree %d' % degree) # k-fold cross validation c_error_d = [] # MSE on each fold for k in range(0, cv_k): print_report('k-fold cross validation, fold %d' % k) x_train, x_test, y_train, y_test = ml.crossValidate( features_train, targets_train, cv_k, k) x_train_, params = ml.transforms.rescale( ml.transforms.fpoly(x_train, degree, 0)) x_test = ml.transforms.rescale(ml.transforms.fpoly(x_test, degree, 0), params)[0] learner = ml.linear.linearRegress(x_train, y_train) y_predicted = learner.predict(y_test)