Exemple #1
0
precision_p = 0
recall_p = 0

Loop_n = 1  #循环次数
fold_n = 10  #n-折交叉验证:折数

for i in range(0, Loop_n):
    train = shuffle(data_train)
    x_columns = [x for x in train.columns if x not in [label, cardcol]]
    X = train[x_columns]
    y = train[label]

    X = np.array(X)
    y = np.array(y)
    kf = KFold(n_splits=fold_n)
    kf.get_n_splits(X)  #给出K折的折数,输出为2
    for train_index, test_index in kf.split(X):
        print("TRAIN:", train_index, "TEST:", test_index)
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        rf_model = RandomForestClassifier(oob_score=True, random_state=10)
        time1 = time.time()
        rf_model.fit(X_train, y_train)
        time2 = time.time()
        print "rf_model used time: %f sec" % (time2 - time1)  #时间 second

        pred_test = rf_model.predict(X_test)
        temp_m = confusion_matrix(y_test, pred_test)
        precision_p = precision_p + float(temp_m[1][1]) / float(
            (temp_m[0][1] + temp_m[1][1]))
np.mean((y_train - pred_train)**2)
np.mean((y_test - pred_test)**2)

## Use validation set approach and analyze train and test errors

# Compute RMSE using 10-fold x-validation
from sklearn.cross_validation import KFold

X = bos.drop('PRICE', axis=1)
y = bos.PRICE
X = np.array(X)
y = np.array(y)

kf = KFold(n_splits=10) # Define the split - into 2 folds 
kf.get_n_splits(X) # returns the number of splitting iterations in the cross-validator

xval_err = 0
for train_index, test_index in kf.split(X):
    #print('TRAIN:', train_index, 'TEST:', test_index)
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    lm.fit(X_train, y_train)
    p = lm.predict(X_test)
    e = p-y_test
    xval_err += np.dot(e,e)
rmse_10cv = np.sqrt(xval_err/len(x))
print(rmse_10cv)


################
Exemple #3
0
iris_rf.fit(irisX, irisY)
#Model Score
print("The coefficient of determination for the Random Forest model is: %.4f" %
      iris_rf.score(irisX, irisY))

# # K- Fold Cross Validation

# In[6]:

from sklearn.model_selection import KFold
from sklearn.metrics import confusion_matrix

x = irisX
y = irisY
kf = KFold(n_splits=5, random_state=None, shuffle=True)
kf.get_n_splits(x)
for train_i, test_i in kf.split(x):
    print("TRAIN:", train_i, "TEST:", test_i)
    X_train, X_test = x[train_i], x[test_i]
    y_train, y_test = y[train_i], y[test_i]

# # 2. KFold Score
# We use cross validation so as to better predict the test error and gauge the accuracy of our model by using such a prediction. it is used over a validation set so as to not decrease the size of our training data too much as that raises error.

# In[7]:

#K- Fold Score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn import metrics
Exemple #4
0
        mean_tpr /= n_classes

        fpr["macro"] = all_fpr
        tpr["macro"] = mean_tpr
        roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])

        fff.append(all_fpr)
        ttt.append(mean_tpr)
        aucc.append(roc_auc["macro"])

    # Compute average across Folds
    fff = np.array(fff)
    ttt = np.array(ttt)
    aucc = np.array(aucc)

    all_fpr_folds = np.unique(np.concatenate([fff[j] for j in range(kf.get_n_splits())]))
        
    # Then interpolate all ROC curves at this points
    mean_tpr_folds = np.zeros_like(all_fpr_folds)
    for j in range(kf.get_n_splits()):
        mean_tpr_folds += interp(all_fpr_folds, fff[j], ttt[j])

    # Finally average it and compute AUC
    mean_tpr_folds /= float(kf.get_n_splits())

    #mean_mean_tpr_folds= mean_tpr_folds.mean(axis = 0)
    std = mean_tpr_folds.std(axis=0)

    tprs_upper = np.minimum(mean_tpr_folds + std, 1)
    tprs_lower = mean_tpr_folds - std