data = pd.read_csv("D:/Sai/JavaDoc/Cousera/5/2/gbm-data.csv")

y = data[data.columns[1]].values
x = data[data.columns[1:]].values

x_train, x_test, y_train, y_test = train_test_split(x,
                                                    y,
                                                    test_size=0.33,
                                                    random_state=42)

ls = [1, 0.5, 0.3, 0.2, 0.1]

for i in ls:
    clf = GradientBoostingClassifier(n_estimators=250,
                                     verbose=True,
                                     random_state=241,
                                     learning_rate=i)

    clf.fit(x_train, y_train)
    qual_test = clf.staged_decision_function(x_test)
    qual_train = clf.staged_decision_function(x_train)

    predict = clf.predict(x_test)

    pred_trans = 1 / (1 + math.exp(-predict))

    plt.figure()
    plt.plot(test_loss, 'r', linewidth=2)
    plt.plot(train_loss, 'g', linewidth=2)
    plt.legend(['test', 'train'])
print("MSE: %.4f" % mse)
print 

# <codecell>

params = clf.get_params()
params

# <codecell>

test_score = np.zeros((params['n_estimators'],), dtype=np.float64)
test_score

# <codecell>

for i, y_pred in enumerate(clf.staged_decision_function(X_test)):
    test_score[i] = clf.loss_(y_test, y_pred)
    
test_score

# <codecell>

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.title('Deviance')
plt.plot(np.arange(params['n_estimators']) + 1, clf.train_score_, 'b-',
        label='Training Set Deviance')
plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-',
        label='Test Set Deviance')
plt.legend(loc='upper right')
plt.xlabel('Boosting Iterations')