precision_p = 0 recall_p = 0 Loop_n = 1 #循环次数 fold_n = 10 #n-折交叉验证:折数 for i in range(0, Loop_n): train = shuffle(data_train) x_columns = [x for x in train.columns if x not in [label, cardcol]] X = train[x_columns] y = train[label] X = np.array(X) y = np.array(y) kf = KFold(n_splits=fold_n) kf.get_n_splits(X) #给出K折的折数,输出为2 for train_index, test_index in kf.split(X): print("TRAIN:", train_index, "TEST:", test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] rf_model = RandomForestClassifier(oob_score=True, random_state=10) time1 = time.time() rf_model.fit(X_train, y_train) time2 = time.time() print "rf_model used time: %f sec" % (time2 - time1) #时间 second pred_test = rf_model.predict(X_test) temp_m = confusion_matrix(y_test, pred_test) precision_p = precision_p + float(temp_m[1][1]) / float( (temp_m[0][1] + temp_m[1][1]))
np.mean((y_train - pred_train)**2) np.mean((y_test - pred_test)**2) ## Use validation set approach and analyze train and test errors # Compute RMSE using 10-fold x-validation from sklearn.cross_validation import KFold X = bos.drop('PRICE', axis=1) y = bos.PRICE X = np.array(X) y = np.array(y) kf = KFold(n_splits=10) # Define the split - into 2 folds kf.get_n_splits(X) # returns the number of splitting iterations in the cross-validator xval_err = 0 for train_index, test_index in kf.split(X): #print('TRAIN:', train_index, 'TEST:', test_index) X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] lm.fit(X_train, y_train) p = lm.predict(X_test) e = p-y_test xval_err += np.dot(e,e) rmse_10cv = np.sqrt(xval_err/len(x)) print(rmse_10cv) ################
iris_rf.fit(irisX, irisY) #Model Score print("The coefficient of determination for the Random Forest model is: %.4f" % iris_rf.score(irisX, irisY)) # # K- Fold Cross Validation # In[6]: from sklearn.model_selection import KFold from sklearn.metrics import confusion_matrix x = irisX y = irisY kf = KFold(n_splits=5, random_state=None, shuffle=True) kf.get_n_splits(x) for train_i, test_i in kf.split(x): print("TRAIN:", train_i, "TEST:", test_i) X_train, X_test = x[train_i], x[test_i] y_train, y_test = y[train_i], y[test_i] # # 2. KFold Score # We use cross validation so as to better predict the test error and gauge the accuracy of our model by using such a prediction. it is used over a validation set so as to not decrease the size of our training data too much as that raises error. # In[7]: #K- Fold Score from sklearn.model_selection import cross_val_score from sklearn.model_selection import cross_val_predict from sklearn import metrics
mean_tpr /= n_classes fpr["macro"] = all_fpr tpr["macro"] = mean_tpr roc_auc["macro"] = auc(fpr["macro"], tpr["macro"]) fff.append(all_fpr) ttt.append(mean_tpr) aucc.append(roc_auc["macro"]) # Compute average across Folds fff = np.array(fff) ttt = np.array(ttt) aucc = np.array(aucc) all_fpr_folds = np.unique(np.concatenate([fff[j] for j in range(kf.get_n_splits())])) # Then interpolate all ROC curves at this points mean_tpr_folds = np.zeros_like(all_fpr_folds) for j in range(kf.get_n_splits()): mean_tpr_folds += interp(all_fpr_folds, fff[j], ttt[j]) # Finally average it and compute AUC mean_tpr_folds /= float(kf.get_n_splits()) #mean_mean_tpr_folds= mean_tpr_folds.mean(axis = 0) std = mean_tpr_folds.std(axis=0) tprs_upper = np.minimum(mean_tpr_folds + std, 1) tprs_lower = mean_tpr_folds - std